Commit 4bb6d7fe authored by Paul Chaignon's avatar Paul Chaignon Committed by Sasha Goldshtein

u* tools: automatically detect the language (#1067)

* cc: bcc function to detect the language of a process

bcc_procutils_language looks into /proc/$pid/cmdline, /proc/$pid/exe,
and /proc/$pid/maps to determine the language.
Python wrapper takes a list of candidate languages; if the detected
language is not part of the list, None is returned.

* u* tools: automatically detect the language

Uses the detect_language bcc helper. -l switch can override the
detected language. In uthreads and ucalls, the language can be
overwritten to 'none' to trace pthreads and syscalls respectively.

All tools use the -l switch to set the language, for consistency.
parent 00f662db
......@@ -485,3 +485,59 @@ bool bcc_procutils_exit_mountns(struct ns_cookie *nc) {
return rc;
}
/* Detects the following languages + C. */
const char *languages[] = {"java", "python", "ruby", "php", "node"};
const char *language_c = "c";
const int nb_languages = 5;
const char *bcc_procutils_language(int pid) {
char procfilename[22], line[4096], pathname[32], *str;
FILE *procfile;
int i, ret;
/* Look for clues in the absolute path to the executable. */
sprintf(procfilename, "/proc/%ld/exe", (long)pid);
if (realpath(procfilename, line)) {
for (i = 0; i < nb_languages; i++)
if (strstr(line, languages[i]))
return languages[i];
}
sprintf(procfilename, "/proc/%ld/maps", (long)pid);
procfile = fopen(procfilename, "r");
if (!procfile)
return NULL;
/* Look for clues in memory mappings. */
bool libc = false;
do {
char perm[8], dev[8];
long long begin, end, size, inode;
ret = fscanf(procfile, "%llx-%llx %s %llx %s %lld", &begin, &end, perm,
&size, dev, &inode);
if (!fgets(line, sizeof(line), procfile))
break;
if (ret == 6) {
char *mapname = line;
char *newline = strchr(line, '\n');
if (newline)
newline[0] = '\0';
while (isspace(mapname[0])) mapname++;
for (i = 0; i < nb_languages; i++) {
sprintf(pathname, "/lib%s", languages[i]);
if (strstr(mapname, pathname))
return languages[i];
if ((str = strstr(mapname, "libc")) &&
(str[4] == '-' || str[4] == '.'))
libc = true;
}
}
} while (ret && ret != EOF);
fclose(procfile);
/* Return C as the language if libc was found and nothing else. */
return libc ? language_c : NULL;
}
......@@ -41,6 +41,7 @@ int bcc_procutils_each_ksym(bcc_procutils_ksymcb callback, void *payload);
void bcc_procutils_free(const char *ptr);
bool bcc_procutils_enter_mountns(int pid, struct ns_cookie *nc);
bool bcc_procutils_exit_mountns(struct ns_cookie *nc);
const char *bcc_procutils_language(int pid);
#ifdef __cplusplus
}
......
......@@ -134,6 +134,8 @@ lib.bcc_procutils_which_so.restype = ct.POINTER(ct.c_char)
lib.bcc_procutils_which_so.argtypes = [ct.c_char_p, ct.c_int]
lib.bcc_procutils_free.restype = None
lib.bcc_procutils_free.argtypes = [ct.c_void_p]
lib.bcc_procutils_language.restype = ct.POINTER(ct.c_char)
lib.bcc_procutils_language.argtypes = [ct.c_int]
lib.bcc_resolve_symname.restype = ct.c_int
lib.bcc_resolve_symname.argtypes = [
......
......@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ctypes as ct
from .libbcc import lib
def _read_cpu_range(path):
cpus = []
......@@ -31,3 +34,8 @@ def get_online_cpus():
def get_possible_cpus():
return _read_cpu_range('/sys/devices/system/cpu/possible')
def detect_language(candidates, pid):
res = lib.bcc_procutils_language(pid)
language = ct.cast(res, ct.c_char_p).value.decode()
return language if language in candidates else None
......@@ -36,6 +36,12 @@ using namespace std;
static pid_t spawn_child(void *, bool, bool, int (*)(void *));
TEST_CASE("language detection", "[c_api]") {
const char *c = bcc_procutils_language(getpid());
REQUIRE(c);
REQUIRE(string(c).compare("c") == 0);
}
TEST_CASE("shared object resolution", "[c_api]") {
char *libm = bcc_procutils_which_so("m", 0);
REQUIRE(libm);
......
......@@ -311,14 +311,14 @@ class SmokeTests(TestCase):
def test_ucalls(self):
# This attaches a large number (300+) kprobes, which can be slow,
# so use an increased timeout value.
self.run_with_int("ucalls.py -S %d" % os.getpid(),
self.run_with_int("ucalls.py -l none -S %d" % os.getpid(),
timeout=30, kill_timeout=30)
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_uflow(self):
# The Python installed on the Ubuntu buildbot doesn't have USDT
# probes, so we can't run uflow.
# self.run_with_int("uflow.py python %d" % os.getpid())
# self.run_with_int("uflow.py -l python %d" % os.getpid())
pass
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
......@@ -329,7 +329,7 @@ class SmokeTests(TestCase):
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_uobjnew(self):
self.run_with_int("uobjnew.py c %d" % os.getpid())
self.run_with_int("uobjnew.py -l c %d" % os.getpid())
@skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
def test_ustat(self):
......
......@@ -2,9 +2,10 @@
# Copyright (c) Catalysts GmbH
# Licensed under the Apache License, Version 2.0 (the "License")
from bcc.utils import get_online_cpus
from bcc.utils import get_online_cpus, detect_language
import multiprocessing
import unittest
import os
class TestUtils(unittest.TestCase):
def test_get_online_cpus(self):
......@@ -13,6 +14,10 @@ class TestUtils(unittest.TestCase):
self.assertEqual(len(online_cpus), num_cores)
def test_detect_language(self):
candidates = ["java", "ruby", "php", "node", "c", "python"]
language = detect_language(candidates, os.getpid())
self.assertEqual(language, "python")
if __name__ == "__main__":
unittest.main()
......@@ -14,8 +14,11 @@
from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
from time import sleep
import os
languages = ["java", "python", "ruby", "php"]
examples = """examples:
./ucalls -l java 185 # trace Java calls and print statistics on ^C
......@@ -34,8 +37,7 @@ parser = argparse.ArgumentParser(
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("interval", type=int, nargs='?',
help="print every specified number of seconds")
parser.add_argument("-l", "--language",
choices=["java", "python", "ruby", "php"],
parser.add_argument("-l", "--language", choices=languages + ["none"],
help="language to trace (if none, trace syscalls only)")
parser.add_argument("-T", "--top", type=int,
help="number of most frequent/slow calls to print")
......@@ -49,10 +51,14 @@ parser.add_argument("-m", "--milliseconds", action="store_true",
help="report times in milliseconds (default is microseconds)")
args = parser.parse_args()
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
# We assume that the entry and return probes have the same arguments. This is
# the case for Java, Python, Ruby, and PHP. If there's a language where it's
# not the case, we will need to build a custom correlator from entry to exit.
if args.language == "java":
if language == "java":
# TODO for JVM entries, we actually have the real length of the class
# and method strings in arg3 and arg5 respectively, so we can insert
# the null terminator in its proper position.
......@@ -60,27 +66,29 @@ if args.language == "java":
return_probe = "method__return"
read_class = "bpf_usdt_readarg(2, ctx, &clazz);"
read_method = "bpf_usdt_readarg(4, ctx, &method);"
elif args.language == "python":
elif language == "python":
entry_probe = "function__entry"
return_probe = "function__return"
read_class = "bpf_usdt_readarg(1, ctx, &clazz);" # filename really
read_method = "bpf_usdt_readarg(2, ctx, &method);"
elif args.language == "ruby":
elif language == "ruby":
# TODO Also probe cmethod__entry and cmethod__return with same arguments
entry_probe = "method__entry"
return_probe = "method__return"
read_class = "bpf_usdt_readarg(1, ctx, &clazz);"
read_method = "bpf_usdt_readarg(2, ctx, &method);"
elif args.language == "php":
elif language == "php":
entry_probe = "function__entry"
return_probe = "function__return"
read_class = "bpf_usdt_readarg(4, ctx, &clazz);"
read_method = "bpf_usdt_readarg(1, ctx, &method);"
elif not args.language:
elif not language or language == "none":
if not args.syscalls:
print("Nothing to do; use -S to trace syscalls.")
exit(1)
entry_probe, return_probe, read_class, read_method = ("", "", "", "")
if language:
language = None
program = """
#include <linux/ptrace.h>
......@@ -213,11 +221,11 @@ int syscall_return(struct pt_regs *ctx) {
""".replace("READ_CLASS", read_class) \
.replace("READ_METHOD", read_method) \
.replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \
.replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \
.replace("DEFINE_NOLANG", "#define NOLANG" if not language else "") \
.replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \
.replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "")
if args.language:
if language:
usdt = USDT(pid=args.pid)
usdt.enable_probe_or_bail(entry_probe, "trace_entry")
if args.latency:
......@@ -278,7 +286,7 @@ def clear_data():
exit_signaled = False
print("Tracing calls in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none"))
(args.pid, language or "none"))
while True:
try:
sleep(args.interval or 99999999)
......
......@@ -12,7 +12,7 @@ argdist, biotop, fileslower, and others.
For example, to trace method call latency in a Java application:
# ucalls -L -l java $(pidof java)
# ucalls -L $(pidof java)
Tracing calls in process 26877 (language: java)... Ctrl-C to quit.
METHOD # CALLS TIME (us)
......@@ -48,7 +48,7 @@ Detaching kernel probes, please wait...
To print only the top 5 methods and report times in milliseconds (the default
is microseconds):
# ucalls -l python -mT 5 $(pidof python)
# ucalls -mT 5 $(pidof python)
Tracing calls in process 26914 (language: python)... Ctrl-C to quit.
METHOD # CALLS
......@@ -60,7 +60,8 @@ METHOD # CALLS
USAGE message:
# ./ucalls.py -h
usage: ucalls.py [-h] [-l {java,python,ruby,php}] [-T TOP] [-L] [-S] [-v] [-m]
usage: ucalls.py [-h] [-l {java,python,ruby,php,none}] [-T TOP] [-L] [-S] [-v]
[-m]
pid [interval]
Summarize method calls in high-level languages.
......@@ -71,7 +72,7 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-l {java,python,ruby,php}, --language {java,python,ruby,php}
-l {java,python,ruby,php,none}, --language {java,python,ruby,php,none}
language to trace (if none, trace syscalls only)
-T TOP, --top TOP number of most frequent/slow calls to print
-L, --latency record method latency from enter to exit (except
......
......@@ -13,21 +13,24 @@
from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
import ctypes as ct
import time
import os
languages = ["java", "python", "ruby", "php"]
examples = """examples:
./uflow java 185 # trace Java method calls in process 185
./uflow ruby 1344 # trace Ruby method calls in process 1344
./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
./uflow -l java 185 # trace Java method calls in process 185
./uflow -l ruby 134 # trace Ruby method calls in process 134
./uflow -M indexOf -l java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
"""
parser = argparse.ArgumentParser(
description="Trace method execution flow in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("language", choices=["java", "python", "ruby", "php"],
parser.add_argument("-l", "--language", choices=languages,
help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-M", "--method",
......@@ -113,21 +116,25 @@ def enable_probe(probe_name, func_name, read_class, read_method, is_return):
usdt = USDT(pid=args.pid)
if args.language == "java":
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
if language == "java":
enable_probe("method__entry", "java_entry",
"bpf_usdt_readarg(2, ctx, &clazz);",
"bpf_usdt_readarg(4, ctx, &method);", is_return=False)
enable_probe("method__return", "java_return",
"bpf_usdt_readarg(2, ctx, &clazz);",
"bpf_usdt_readarg(4, ctx, &method);", is_return=True)
elif args.language == "python":
elif language == "python":
enable_probe("function__entry", "python_entry",
"bpf_usdt_readarg(1, ctx, &clazz);", # filename really
"bpf_usdt_readarg(2, ctx, &method);", is_return=False)
enable_probe("function__return", "python_return",
"bpf_usdt_readarg(1, ctx, &clazz);", # filename really
"bpf_usdt_readarg(2, ctx, &method);", is_return=True)
elif args.language == "ruby":
elif language == "ruby":
enable_probe("method__entry", "ruby_entry",
"bpf_usdt_readarg(1, ctx, &clazz);",
"bpf_usdt_readarg(2, ctx, &method);", is_return=False)
......@@ -140,13 +147,16 @@ elif args.language == "ruby":
enable_probe("cmethod__return", "ruby_creturn",
"bpf_usdt_readarg(1, ctx, &clazz);",
"bpf_usdt_readarg(2, ctx, &method);", is_return=True)
elif args.language == "php":
elif language == "php":
enable_probe("function__entry", "php_entry",
"bpf_usdt_readarg(4, ctx, &clazz);",
"bpf_usdt_readarg(1, ctx, &method);", is_return=False)
enable_probe("function__return", "php_return",
"bpf_usdt_readarg(4, ctx, &clazz);",
"bpf_usdt_readarg(1, ctx, &method);", is_return=True)
else:
print("No language detected; use -l to trace a language.")
exit(1)
if args.verbose:
print(usdt.get_text())
......@@ -154,7 +164,7 @@ if args.verbose:
bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing method calls in %s process %d... Ctrl-C to quit." %
(args.language, args.pid))
(language, args.pid))
print("%-3s %-6s %-6s %-8s %s" % ("CPU", "PID", "TID", "TIME(us)", "METHOD"))
class CallEvent(ct.Structure):
......
......@@ -10,7 +10,7 @@ method invocations.
For example, trace all Ruby method calls in a specific process:
# ./uflow ruby 27245
# ./uflow -l ruby 27245
Tracing method calls in ruby process 27245... Ctrl-C to quit.
CPU PID TID TIME(us) METHOD
3 27245 27245 4.536 <- IO.gets
......@@ -34,7 +34,7 @@ and the <- and -> arrows indicate the direction of the event (exit or entry).
Often, the amount of output can be overwhelming. You can filter specific
classes or methods. For example, trace only methods from the Thread class:
# ./uflow -C java/lang/Thread java $(pidof java)
# ./uflow -C java/lang/Thread $(pidof java)
Tracing method calls in java process 27722... Ctrl-C to quit.
CPU PID TID TIME(us) METHOD
3 27722 27731 3.144 -> java/lang/Thread.<init>
......@@ -88,17 +88,18 @@ thread running on the same CPU.
USAGE message:
# ./uflow -h
usage: uflow.py [-h] [-M METHOD] [-C CLAZZ] [-v] {java,python,ruby,php} pid
usage: uflow.py [-h] [-l {java,python,ruby,php}] [-M METHOD] [-C CLAZZ] [-v]
pid
Trace method execution flow in high-level languages.
positional arguments:
{java,python,ruby,php}
language to trace
pid process id to attach to
optional arguments:
-h, --help show this help message and exit
-l {java,python,ruby,php}, --language {java,python,ruby,php}
language to trace
-M METHOD, --method METHOD
trace only calls to methods starting with this prefix
-C CLAZZ, --class CLAZZ
......@@ -107,7 +108,7 @@ optional arguments:
purposes)
examples:
./uflow java 185 # trace Java method calls in process 185
./uflow ruby 1344 # trace Ruby method calls in process 1344
./uflow -M indexOf java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
./uflow -l java 185 # trace Java method calls in process 185
./uflow -l ruby 134 # trace Ruby method calls in process 134
./uflow -M indexOf -l java 185 # trace only 'indexOf'-prefixed methods
./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
......@@ -13,20 +13,23 @@
from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
import ctypes as ct
import time
import os
languages = ["java", "python", "ruby", "node"]
examples = """examples:
./ugc java 185 # trace Java GCs in process 185
./ugc ruby 1344 -m # trace Ruby GCs reporting in ms
./ugc -M 10 java 185 # trace only Java GCs longer than 10ms
./ugc -l java 185 # trace Java GCs in process 185
./ugc -l ruby 1344 -m # trace Ruby GCs reporting in ms
./ugc -M 10 -l java 185 # trace only Java GCs longer than 10ms
"""
parser = argparse.ArgumentParser(
description="Summarize garbage collection events in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("language", choices=["java", "python", "ruby", "node"],
parser.add_argument("-l", "--language", choices=languages,
help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-v", "--verbose", action="store_true",
......@@ -111,10 +114,14 @@ int trace_%s(struct pt_regs *ctx) {
probes = []
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
#
# Java
#
if args.language == "java":
if language == "java":
# Oddly, the gc__begin/gc__end probes don't really have any useful
# information, while the mem__pool* ones do. There's also a bunch of
# probes described in the hotspot_gc*.stp file which aren't there
......@@ -145,7 +152,7 @@ if args.language == "java":
#
# Python
#
elif args.language == "python":
elif language == "python":
begin_save = """
int gen = 0;
bpf_usdt_readarg(1, ctx, &gen);
......@@ -166,7 +173,7 @@ elif args.language == "python":
#
# Ruby
#
elif args.language == "ruby":
elif language == "ruby":
# Ruby GC probes do not have any additional information available.
probes.append(Probe("gc__mark__begin", "gc__mark__end",
"", "", lambda _: "GC mark stage"))
......@@ -175,7 +182,7 @@ elif args.language == "ruby":
#
# Node
#
elif args.language == "node":
elif language == "node":
end_save = """
u32 gc_type = 0;
bpf_usdt_readarg(1, ctx, &gc_type);
......@@ -188,6 +195,11 @@ elif args.language == "node":
[desc for desc, val in descs.items()
if e.field1 & val != 0])))
else:
print("No language detected; use -l to trace a language.")
exit(1)
for probe in probes:
program += probe.generate()
probe.attach()
......@@ -198,7 +210,7 @@ if args.verbose:
bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing garbage collections in %s process %d... Ctrl-C to quit." %
(args.language, args.pid))
(language, args.pid))
time_col = "TIME (ms)" if args.milliseconds else "TIME (us)"
print("%-8s %-8s %-40s" % ("START", time_col, "DESCRIPTION"))
......
......@@ -8,7 +8,7 @@ the GC event is also provided.
For example, to trace all garbage collection events in a specific Node process:
# ugc node $(pidof node)
# ugc $(pidof node)
Tracing garbage collections in node process 30012... Ctrl-C to quit.
START TIME (us) DESCRIPTION
1.500 1181.00 GC scavenge
......@@ -44,7 +44,7 @@ Occasionally, it might be useful to filter out collections that are very short,
or display only collections that have a specific description. The -M and -F
switches can be useful for this:
# ugc -F Tenured java $(pidof java)
# ugc -F Tenured $(pidof java)
Tracing garbage collections in java process 29907... Ctrl-C to quit.
START TIME (us) DESCRIPTION
0.360 4309.00 MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256
......@@ -52,7 +52,7 @@ START TIME (us) DESCRIPTION
4.648 4139.00 MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256
^C
# ugc -M 1 java $(pidof java)
# ugc -M 1 $(pidof java)
Tracing garbage collections in java process 29907... Ctrl-C to quit.
START TIME (us) DESCRIPTION
0.160 3715.00 MarkSweepCompact Code Cache used=287528->3209472 max=173408256->251658240
......@@ -68,18 +68,19 @@ START TIME (us) DESCRIPTION
USAGE message:
# ugc -h
usage: ugc.py [-h] [-v] [-m] [-M MINIMUM] [-F FILTER]
{java,python,ruby,node} pid
usage: ugc.py [-h] [-l {java,python,ruby,node}] [-v] [-m] [-M MINIMUM]
[-F FILTER]
pid
Summarize garbage collection events in high-level languages.
positional arguments:
{java,python,ruby,node}
language to trace
pid process id to attach to
optional arguments:
-h, --help show this help message and exit
-l {java,python,ruby,node}, --language {java,python,ruby,node}
language to trace
-v, --verbose verbose mode: print the BPF program (for debugging
purposes)
-m, --milliseconds report times in milliseconds (default is microseconds)
......@@ -89,6 +90,6 @@ optional arguments:
display only GCs whose description contains this text
examples:
./ugc java 185 # trace Java GCs in process 185
./ugc ruby 1344 -m # trace Ruby GCs reporting in ms
./ugc -M 10 java 185 # trace only Java GCs longer than 10ms
./ugc -l java 185 # trace Java GCs in process 185
./ugc -l ruby 1344 -m # trace Ruby GCs reporting in ms
./ugc -M 10 -l java 185 # trace only Java GCs longer than 10ms
......@@ -13,20 +13,24 @@
from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
from time import sleep
import os
# C needs to be the last language.
languages = ["java", "ruby", "c"]
examples = """examples:
./uobjnew java 145 # summarize Java allocations in process 145
./uobjnew c 2020 1 # grab malloc() sizes and print every second
./uobjnew ruby 6712 -C 10 # top 10 Ruby types by number of allocations
./uobjnew ruby 6712 -S 10 # top 10 Ruby types by total size
./uobjnew -l java 145 # summarize Java allocations in process 145
./uobjnew -l c 2020 1 # grab malloc() sizes and print every second
./uobjnew -l ruby 6712 -C 10 # top 10 Ruby types by number of allocations
./uobjnew -l ruby 6712 -S 10 # top 10 Ruby types by total size
"""
parser = argparse.ArgumentParser(
description="Summarize object allocations in high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("language", choices=["java", "ruby", "c"],
parser.add_argument("-l", "--language", choices=languages,
help="language to trace")
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("interval", type=int, nargs='?',
......@@ -39,6 +43,10 @@ parser.add_argument("-v", "--verbose", action="store_true",
help="verbose mode: print the BPF program (for debugging purposes)")
args = parser.parse_args()
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
program = """
#include <linux/ptrace.h>
......@@ -56,14 +64,14 @@ struct val_t {
};
BPF_HASH(allocs, struct key_t, struct val_t);
""".replace("MALLOC_TRACING", "1" if args.language == "c" else "0")
""".replace("MALLOC_TRACING", "1" if language == "c" else "0")
usdt = USDT(pid=args.pid)
#
# Java
#
if args.language == "java":
if language == "java":
program += """
int alloc_entry(struct pt_regs *ctx) {
struct key_t key = {};
......@@ -82,7 +90,7 @@ int alloc_entry(struct pt_regs *ctx) {
#
# Ruby
#
elif args.language == "ruby":
elif language == "ruby":
create_template = """
int THETHING_alloc_entry(struct pt_regs *ctx) {
struct key_t key = { .name = "THETHING" };
......@@ -115,7 +123,7 @@ int object_alloc_entry(struct pt_regs *ctx) {
#
# C
#
elif args.language == "c":
elif language == "c":
program += """
int alloc_entry(struct pt_regs *ctx, size_t size) {
struct key_t key = {};
......@@ -128,18 +136,23 @@ int alloc_entry(struct pt_regs *ctx, size_t size) {
}
"""
else:
print("No language detected; use -l to trace a language.")
exit(1)
if args.verbose:
print(usdt.get_text())
print(program)
bpf = BPF(text=program, usdt_contexts=[usdt])
if args.language == "c":
if language == "c":
bpf.attach_uprobe(name="c", sym="malloc", fn_name="alloc_entry",
pid=args.pid)
exit_signaled = False
print("Tracing allocations in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none"))
(args.pid, language or "none"))
while True:
try:
sleep(args.interval or 99999999)
......@@ -157,7 +170,7 @@ while True:
data = sorted(data.items(), key=lambda kv: kv[1].total_size)
print("%-30s %8s %12s" % ("TYPE", "# ALLOCS", "# BYTES"))
for key, value in data:
if args.language == "c":
if language == "c":
obj_type = "block size %d" % key.size
else:
obj_type = key.name
......
......@@ -9,7 +9,7 @@ can in turn cause heavy garbage collection.
For example, trace Ruby object allocations when running some simple commands
in irb (the Ruby REPL):
# ./uobjnew ruby 27245
# ./uobjnew -l ruby 27245
Tracing allocations in process 27245 (language: ruby)... Ctrl-C to quit.
TYPE # ALLOCS # BYTES
......@@ -28,7 +28,7 @@ Plain C/C++ allocations (through "malloc") are also supported. We can't report
the type being allocated, but we can report the object sizes at least. Also,
print only the top 10 rows by number of bytes allocated:
# ./uobjnew -S 10 c 27245
# ./uobjnew -S 10 -l c 27245
Tracing allocations in process 27245 (language: c)... Ctrl-C to quit.
TYPE # ALLOCS # BYTES
......@@ -48,18 +48,19 @@ block size 80 569 45520
USAGE message:
# ./uobjnew -h
usage: uobjnew.py [-h] [-C TOP_COUNT] [-S TOP_SIZE] [-v]
{java,ruby,c} pid [interval]
usage: uobjnew.py [-h] [-l {java,ruby,c}] [-C TOP_COUNT] [-S TOP_SIZE] [-v]
pid [interval]
Summarize object allocations in high-level languages.
positional arguments:
{java,ruby,c} language to trace
pid process id to attach to
interval print every specified number of seconds
optional arguments:
-h, --help show this help message and exit
-l {java,ruby,c}, --language {java,ruby,c}
language to trace
-C TOP_COUNT, --top-count TOP_COUNT
number of most frequently allocated types to print
-S TOP_SIZE, --top-size TOP_SIZE
......@@ -68,7 +69,7 @@ optional arguments:
purposes)
examples:
./uobjnew java 145 # summarize Java allocations in process 145
./uobjnew c 2020 1 # grab malloc() sizes and print every second
./uobjnew ruby 6712 -C 10 # top 10 Ruby types by number of allocations
./uobjnew ruby 6712 -S 10 # top 10 Ruby types by total size
./uobjnew -l java 145 # summarize Java allocations in process 145
./uobjnew -l c 2020 1 # grab malloc() sizes and print every second
./uobjnew -l ruby 6712 -C 10 # top 10 Ruby types by number of allocations
./uobjnew -l ruby 6712 -S 10 # top 10 Ruby types by total size
......@@ -13,9 +13,12 @@
from __future__ import print_function
import argparse
from bcc import BPF, USDT
from bcc import BPF, USDT, utils
import ctypes as ct
import time
import os
languages = ["java"]
examples = """examples:
./uthreads -l java 185 # trace Java threads in process 185
......@@ -26,7 +29,7 @@ parser = argparse.ArgumentParser(
"high-level languages.",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=examples)
parser.add_argument("-l", "--language", choices=["java"],
parser.add_argument("-l", "--language", choices=languages + ["none"],
help="language to trace (none for pthreads only)")
parser.add_argument("pid", type=int, help="process id to attach to")
parser.add_argument("-v", "--verbose", action="store_true",
......@@ -59,7 +62,11 @@ int trace_pthread(struct pt_regs *ctx) {
"""
usdt.enable_probe_or_bail("pthread_start", "trace_pthread")
if args.language == "java":
language = args.language
if not language:
language = utils.detect_language(languages, args.pid)
if language == "java":
template = """
int %s(struct pt_regs *ctx) {
char type[] = "%s";
......@@ -87,7 +94,7 @@ if args.verbose:
bpf = BPF(text=program, usdt_contexts=[usdt])
print("Tracing thread events in process %d (language: %s)... Ctrl-C to quit." %
(args.pid, args.language or "none"))
(args.pid, language or "none"))
print("%-8s %-16s %-8s %-30s" % ("TIME", "ID", "TYPE", "DESCRIPTION"))
class ThreadEvent(ct.Structure):
......
......@@ -39,7 +39,7 @@ instead.
USAGE message:
# ./uthreads -h
usage: uthreads.py [-h] [-l {java}] [-v] pid
usage: uthreads.py [-h] [-l {java,none}] [-v] pid
Trace thread creation/destruction events in high-level languages.
......@@ -48,7 +48,7 @@ positional arguments:
optional arguments:
-h, --help show this help message and exit
-l {java}, --language {java}
-l {java,none}, --language {java,none}
language to trace (none for pthreads only)
-v, --verbose verbose mode: print the BPF program (for debugging
purposes)
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment