u* tools: automatically detect the language (#1067)

* cc: bcc function to detect the language of a process bcc_procutils_language looks into /proc/$pid/cmdline, /proc/$pid/exe, and /proc/$pid/maps to determine the language. Python wrapper takes a list of candidate languages; if the detected language is not part of the list, None is returned. * u* tools: automatically detect the language Uses the detect_language bcc helper. -l switch can override the detected language. In uthreads and ucalls, the language can be overwritten to 'none' to trace pthreads and syscalls respectively. All tools use the -l switch to set the language, for consistency.

u* tools: automatically detect the language (#1067)
* cc: bcc function to detect the language of a process bcc_procutils_language looks into /proc/$pid/cmdline, /proc/$pid/exe, and /proc/$pid/maps to determine the language. Python wrapper takes a list of candidate languages; if the detected language is not part of the list, None is returned. * u* tools: automatically detect the language Uses the detect_language bcc helper. -l switch can override the detected language. In uthreads and ucalls, the language can be overwritten to 'none' to trace pthreads and syscalls respectively. All tools use the -l switch to set the language, for consistency.
4bb6d7fe · Paul Chaignon · Sasha Goldshtein · 00f662db · 4bb6d7fe · 4bb6d7fe
Commit 4bb6d7fe authored Mar 30, 2017 by Paul Chaignon Committed by Sasha Goldshtein Mar 30, 2017
17 changed files
--- a/src/cc/bcc_proc.c
+++ b/src/cc/bcc_proc.c
@@ -485,3 +485,59 @@ bool bcc_procutils_exit_mountns(struct ns_cookie *nc) {

  return rc;
 }
+
+/* Detects the following languages + C. */
+const char *languages[] = {"java", "python", "ruby", "php", "node"};
+const char *language_c = "c";
+const int nb_languages = 5;
+
+const char *bcc_procutils_language(int pid) {
+  char procfilename[22], line[4096], pathname[32], *str;
+  FILE *procfile;
+  int i, ret;
+
+  /* Look for clues in the absolute path to the executable. */
+  sprintf(procfilename, "/proc/%ld/exe", (long)pid);
+  if (realpath(procfilename, line)) {
+    for (i = 0; i < nb_languages; i++)
+      if (strstr(line, languages[i]))
+        return languages[i];
+  }
+
+
+  sprintf(procfilename, "/proc/%ld/maps", (long)pid);
+  procfile = fopen(procfilename, "r");
+  if (!procfile)
+    return NULL;
+
+  /* Look for clues in memory mappings. */
+  bool libc = false;
+  do {
+    char perm[8], dev[8];
+    long long begin, end, size, inode;
+    ret = fscanf(procfile, "%llx-%llx %s %llx %s %lld", &begin, &end, perm,
+                 &size, dev, &inode);
+    if (!fgets(line, sizeof(line), procfile))
+      break;
+    if (ret == 6) {
+      char *mapname = line;
+      char *newline = strchr(line, '\n');
+      if (newline)
+        newline[0] = '\0';
+      while (isspace(mapname[0])) mapname++;
+      for (i = 0; i < nb_languages; i++) {
+        sprintf(pathname, "/lib%s", languages[i]);
+        if (strstr(mapname, pathname))
+          return languages[i];
+        if ((str = strstr(mapname, "libc")) &&
+            (str[4] == '-' || str[4] == '.'))
+          libc = true;
+      }
+    }
+  } while (ret && ret != EOF);
+
+  fclose(procfile);
+
+  /* Return C as the language if libc was found and nothing else. */
+  return libc ? language_c : NULL;
+}
--- a/src/cc/bcc_proc.h
+++ b/src/cc/bcc_proc.h
@@ -41,6 +41,7 @@ int bcc_procutils_each_ksym(bcc_procutils_ksymcb callback, void *payload);
 void bcc_procutils_free(const char *ptr);
 bool bcc_procutils_enter_mountns(int pid, struct ns_cookie *nc);
 bool bcc_procutils_exit_mountns(struct ns_cookie *nc);
+const char *bcc_procutils_language(int pid);

 #ifdef __cplusplus
 }

--- a/src/python/bcc/libbcc.py
+++ b/src/python/bcc/libbcc.py
@@ -134,6 +134,8 @@ lib.bcc_procutils_which_so.restype = ct.POINTER(ct.c_char)
 lib.bcc_procutils_which_so.argtypes = [ct.c_char_p, ct.c_int]
 lib.bcc_procutils_free.restype = None
 lib.bcc_procutils_free.argtypes = [ct.c_void_p]
+lib.bcc_procutils_language.restype = ct.POINTER(ct.c_char)
+lib.bcc_procutils_language.argtypes = [ct.c_int]

 lib.bcc_resolve_symname.restype = ct.c_int
 lib.bcc_resolve_symname.argtypes = [

--- a/src/python/bcc/utils.py
+++ b/src/python/bcc/utils.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import ctypes as ct
+
+from .libbcc import lib

 def _read_cpu_range(path):
    cpus = []
@@ -31,3 +34,8 @@ def get_online_cpus():

 def get_possible_cpus():
    return _read_cpu_range('/sys/devices/system/cpu/possible')
+
+def detect_language(candidates, pid):
+    res = lib.bcc_procutils_language(pid)
+    language = ct.cast(res, ct.c_char_p).value.decode()
+    return language if language in candidates else None
--- a/tests/cc/test_c_api.cc
+++ b/tests/cc/test_c_api.cc
@@ -36,6 +36,12 @@ using namespace std;

 static pid_t spawn_child(void *, bool, bool, int (*)(void *));

+TEST_CASE("language detection", "[c_api]") {
+  const char *c = bcc_procutils_language(getpid());
+  REQUIRE(c);
+  REQUIRE(string(c).compare("c") == 0);
+}
+
 TEST_CASE("shared object resolution", "[c_api]") {
  char *libm = bcc_procutils_which_so("m", 0);
  REQUIRE(libm);

--- a/tests/python/test_tools_smoke.py
+++ b/tests/python/test_tools_smoke.py
@@ -311,14 +311,14 @@ class SmokeTests(TestCase):
    def test_ucalls(self):
        # This attaches a large number (300+) kprobes, which can be slow,
        # so use an increased timeout value.
-        self.run_with_int("ucalls.py -S %d" % os.getpid(),
+        self.run_with_int("ucalls.py -l none -S %d" % os.getpid(),
                          timeout=30, kill_timeout=30)

    @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
    def test_uflow(self):
        # The Python installed on the Ubuntu buildbot doesn't have USDT
        # probes, so we can't run uflow.
-        # self.run_with_int("uflow.py python %d" % os.getpid())
+        # self.run_with_int("uflow.py -l python %d" % os.getpid())
        pass

    @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
@@ -329,7 +329,7 @@ class SmokeTests(TestCase):

    @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
    def test_uobjnew(self):
-        self.run_with_int("uobjnew.py c %d" % os.getpid())
+        self.run_with_int("uobjnew.py -l c %d" % os.getpid())

    @skipUnless(kernel_version_ge(4,4), "requires kernel >= 4.4")
    def test_ustat(self):

--- a/tests/python/test_utils.py
+++ b/tests/python/test_utils.py
@@ -2,9 +2,10 @@
 # Copyright (c) Catalysts GmbH
 # Licensed under the Apache License, Version 2.0 (the "License")

-from bcc.utils import get_online_cpus
+from bcc.utils import get_online_cpus, detect_language
 import multiprocessing
 import unittest
+import os

 class TestUtils(unittest.TestCase):
    def test_get_online_cpus(self):
@@ -13,6 +14,10 @@ class TestUtils(unittest.TestCase):

        self.assertEqual(len(online_cpus), num_cores)

+    def test_detect_language(self):
+        candidates = ["java", "ruby", "php", "node", "c", "python"]
+        language = detect_language(candidates, os.getpid())
+        self.assertEqual(language, "python")

 if __name__ == "__main__":
    unittest.main()
--- a/tools/ucalls.py
+++ b/tools/ucalls.py
@@ -14,8 +14,11 @@

 from __future__ import print_function
 import argparse
-from bcc import BPF, USDT
+from bcc import BPF, USDT, utils
 from time import sleep
+import os
+
+languages = ["java", "python", "ruby", "php"]

 examples = """examples:
    ./ucalls -l java 185        # trace Java calls and print statistics on ^C
@@ -34,8 +37,7 @@ parser = argparse.ArgumentParser(
 parser.add_argument("pid", type=int, help="process id to attach to")
 parser.add_argument("interval", type=int, nargs='?',
    help="print every specified number of seconds")
-parser.add_argument("-l", "--language",
-    choices=["java", "python", "ruby", "php"],
+parser.add_argument("-l", "--language", choices=languages + ["none"],
    help="language to trace (if none, trace syscalls only)")
 parser.add_argument("-T", "--top", type=int,
    help="number of most frequent/slow calls to print")
@@ -49,10 +51,14 @@ parser.add_argument("-m", "--milliseconds", action="store_true",
    help="report times in milliseconds (default is microseconds)")
 args = parser.parse_args()

+language = args.language
+if not language:
+    language = utils.detect_language(languages, args.pid)
+
 # We assume that the entry and return probes have the same arguments. This is
 # the case for Java, Python, Ruby, and PHP. If there's a language where it's
 # not the case, we will need to build a custom correlator from entry to exit.
-if args.language == "java":
+if language == "java":
    # TODO for JVM entries, we actually have the real length of the class
    #      and method strings in arg3 and arg5 respectively, so we can insert
    #      the null terminator in its proper position.
@@ -60,27 +66,29 @@ if args.language == "java":
    return_probe = "method__return"
    read_class = "bpf_usdt_readarg(2, ctx, &clazz);"
    read_method = "bpf_usdt_readarg(4, ctx, &method);"
-elif args.language == "python":
+elif language == "python":
    entry_probe = "function__entry"
    return_probe = "function__return"
    read_class = "bpf_usdt_readarg(1, ctx, &clazz);"    # filename really
    read_method = "bpf_usdt_readarg(2, ctx, &method);"
-elif args.language == "ruby":
+elif language == "ruby":
    # TODO Also probe cmethod__entry and cmethod__return with same arguments
    entry_probe = "method__entry"
    return_probe = "method__return"
    read_class = "bpf_usdt_readarg(1, ctx, &clazz);"
    read_method = "bpf_usdt_readarg(2, ctx, &method);"
-elif args.language == "php":
+elif language == "php":
    entry_probe = "function__entry"
    return_probe = "function__return"
    read_class = "bpf_usdt_readarg(4, ctx, &clazz);"
    read_method = "bpf_usdt_readarg(1, ctx, &method);"
-elif not args.language:
+elif not language or language == "none":
    if not args.syscalls:
        print("Nothing to do; use -S to trace syscalls.")
        exit(1)
    entry_probe, return_probe, read_class, read_method = ("", "", "", "")
+    if language:
+        language = None

 program = """
 #include <linux/ptrace.h>
@@ -213,11 +221,11 @@ int syscall_return(struct pt_regs *ctx) {
 """.replace("READ_CLASS", read_class) \
   .replace("READ_METHOD", read_method) \
   .replace("PID_FILTER", "if ((pid >> 32) != %d) { return 0; }" % args.pid) \
-   .replace("DEFINE_NOLANG", "#define NOLANG" if not args.language else "") \
+   .replace("DEFINE_NOLANG", "#define NOLANG" if not language else "") \
   .replace("DEFINE_LATENCY", "#define LATENCY" if args.latency else "") \
   .replace("DEFINE_SYSCALLS", "#define SYSCALLS" if args.syscalls else "")

-if args.language:
+if language:
    usdt = USDT(pid=args.pid)
    usdt.enable_probe_or_bail(entry_probe, "trace_entry")
    if args.latency:
@@ -278,7 +286,7 @@ def clear_data():

 exit_signaled = False
 print("Tracing calls in process %d (language: %s)... Ctrl-C to quit." %
-      (args.pid, args.language or "none"))
+      (args.pid, language or "none"))
 while True:
    try:
        sleep(args.interval or 99999999)

--- a/tools/ucalls_example.txt
+++ b/tools/ucalls_example.txt
@@ -12,7 +12,7 @@ argdist, biotop, fileslower, and others.

 For example, to trace method call latency in a Java application:

-# ucalls -L -l java $(pidof java)
+# ucalls -L $(pidof java)
 Tracing calls in process 26877 (language: java)... Ctrl-C to quit.

 METHOD                                              # CALLS TIME (us)
@@ -48,7 +48,7 @@ Detaching kernel probes, please wait...
 To print only the top 5 methods and report times in milliseconds (the default
 is microseconds):

-# ucalls -l python -mT 5 $(pidof python)
+# ucalls -mT 5 $(pidof python)
 Tracing calls in process 26914 (language: python)... Ctrl-C to quit.

 METHOD                                              # CALLS
@@ -60,7 +60,8 @@ METHOD                                              # CALLS
 USAGE message:

 # ./ucalls.py -h
-usage: ucalls.py [-h] [-l {java,python,ruby,php}] [-T TOP] [-L] [-S] [-v] [-m]
+usage: ucalls.py [-h] [-l {java,python,ruby,php,none}] [-T TOP] [-L] [-S] [-v]
+                 [-m]
                 pid [interval]

 Summarize method calls in high-level languages.
@@ -71,7 +72,7 @@ positional arguments:

 optional arguments:
  -h, --help            show this help message and exit
-  -l {java,python,ruby,php}, --language {java,python,ruby,php}
+  -l {java,python,ruby,php,none}, --language {java,python,ruby,php,none}
                        language to trace (if none, trace syscalls only)
  -T TOP, --top TOP     number of most frequent/slow calls to print
  -L, --latency         record method latency from enter to exit (except

--- a/tools/uflow.py
+++ b/tools/uflow.py
@@ -13,21 +13,24 @@

 from __future__ import print_function
 import argparse
-from bcc import BPF, USDT
+from bcc import BPF, USDT, utils
 import ctypes as ct
 import time
+import os
+
+languages = ["java", "python", "ruby", "php"]

 examples = """examples:
-    ./uflow java 185                # trace Java method calls in process 185
-    ./uflow ruby 1344               # trace Ruby method calls in process 1344
-    ./uflow -M indexOf java 185     # trace only 'indexOf'-prefixed methods
-    ./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
+    ./uflow -l java 185                # trace Java method calls in process 185
+    ./uflow -l ruby 134                # trace Ruby method calls in process 134
+    ./uflow -M indexOf -l java 185     # trace only 'indexOf'-prefixed methods
+    ./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
 """
 parser = argparse.ArgumentParser(
    description="Trace method execution flow in high-level languages.",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
-parser.add_argument("language", choices=["java", "python", "ruby", "php"],
+parser.add_argument("-l", "--language", choices=languages,
    help="language to trace")
 parser.add_argument("pid", type=int, help="process id to attach to")
 parser.add_argument("-M", "--method",
@@ -113,21 +116,25 @@ def enable_probe(probe_name, func_name, read_class, read_method, is_return):

 usdt = USDT(pid=args.pid)

-if args.language == "java":
+language = args.language
+if not language:
+    language = utils.detect_language(languages, args.pid)
+
+if language == "java":
    enable_probe("method__entry", "java_entry",
                 "bpf_usdt_readarg(2, ctx, &clazz);",
                 "bpf_usdt_readarg(4, ctx, &method);", is_return=False)
    enable_probe("method__return", "java_return",
                 "bpf_usdt_readarg(2, ctx, &clazz);",
                 "bpf_usdt_readarg(4, ctx, &method);", is_return=True)
-elif args.language == "python":
+elif language == "python":
    enable_probe("function__entry", "python_entry",
                 "bpf_usdt_readarg(1, ctx, &clazz);",   # filename really
                 "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
    enable_probe("function__return", "python_return",
                 "bpf_usdt_readarg(1, ctx, &clazz);",   # filename really
                 "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
-elif args.language == "ruby":
+elif language == "ruby":
    enable_probe("method__entry", "ruby_entry",
                 "bpf_usdt_readarg(1, ctx, &clazz);",
                 "bpf_usdt_readarg(2, ctx, &method);", is_return=False)
@@ -140,13 +147,16 @@ elif args.language == "ruby":
    enable_probe("cmethod__return", "ruby_creturn",
                 "bpf_usdt_readarg(1, ctx, &clazz);",
                 "bpf_usdt_readarg(2, ctx, &method);", is_return=True)
-elif args.language == "php":
+elif language == "php":
    enable_probe("function__entry", "php_entry",
                 "bpf_usdt_readarg(4, ctx, &clazz);",
                 "bpf_usdt_readarg(1, ctx, &method);", is_return=False)
    enable_probe("function__return", "php_return",
                 "bpf_usdt_readarg(4, ctx, &clazz);",
                 "bpf_usdt_readarg(1, ctx, &method);", is_return=True)
+else:
+    print("No language detected; use -l to trace a language.")
+    exit(1)

 if args.verbose:
    print(usdt.get_text())
@@ -154,7 +164,7 @@ if args.verbose:

 bpf = BPF(text=program, usdt_contexts=[usdt])
 print("Tracing method calls in %s process %d... Ctrl-C to quit." %
-      (args.language, args.pid))
+      (language, args.pid))
 print("%-3s %-6s %-6s %-8s %s" % ("CPU", "PID", "TID", "TIME(us)", "METHOD"))

 class CallEvent(ct.Structure):

--- a/tools/uflow_example.txt
+++ b/tools/uflow_example.txt
@@ -10,7 +10,7 @@ method invocations.

 For example, trace all Ruby method calls in a specific process:

-# ./uflow ruby 27245
+# ./uflow -l ruby 27245
 Tracing method calls in ruby process 27245... Ctrl-C to quit.
 CPU PID    TID    TIME(us) METHOD
 3   27245  27245  4.536    <- IO.gets                              
@@ -34,7 +34,7 @@ and the <- and -> arrows indicate the direction of the event (exit or entry).
 Often, the amount of output can be overwhelming. You can filter specific 
 classes or methods. For example, trace only methods from the Thread class:

-# ./uflow -C java/lang/Thread java $(pidof java)
+# ./uflow -C java/lang/Thread $(pidof java)
 Tracing method calls in java process 27722... Ctrl-C to quit.
 CPU PID    TID    TIME(us) METHOD
 3   27722  27731  3.144    -> java/lang/Thread.<init>              
@@ -88,17 +88,18 @@ thread running on the same CPU.
 USAGE message:

 # ./uflow -h
-usage: uflow.py [-h] [-M METHOD] [-C CLAZZ] [-v] {java,python,ruby,php} pid
+usage: uflow.py [-h] [-l {java,python,ruby,php}] [-M METHOD] [-C CLAZZ] [-v]
+                pid

 Trace method execution flow in high-level languages.

 positional arguments:
-  {java,python,ruby,php}
-			language to trace
  pid                   process id to attach to

 optional arguments:
  -h, --help            show this help message and exit
+  -l {java,python,ruby,php}, --language {java,python,ruby,php}
+                        language to trace
  -M METHOD, --method METHOD
                        trace only calls to methods starting with this prefix
  -C CLAZZ, --class CLAZZ
@@ -107,7 +108,7 @@ optional arguments:
                        purposes)

 examples:
-    ./uflow java 185                # trace Java method calls in process 185
-    ./uflow ruby 1344               # trace Ruby method calls in process 1344
-    ./uflow -M indexOf java 185     # trace only 'indexOf'-prefixed methods
-    ./uflow -C '<stdin>' python 180 # trace only REPL-defined methods
+    ./uflow -l java 185                # trace Java method calls in process 185
+    ./uflow -l ruby 134                # trace Ruby method calls in process 134
+    ./uflow -M indexOf -l java 185     # trace only 'indexOf'-prefixed methods
+    ./uflow -C '<stdin>' -l python 180 # trace only REPL-defined methods
--- a/tools/ugc.py
+++ b/tools/ugc.py
@@ -13,20 +13,23 @@

 from __future__ import print_function
 import argparse
-from bcc import BPF, USDT
+from bcc import BPF, USDT, utils
 import ctypes as ct
 import time
+import os
+
+languages = ["java", "python", "ruby", "node"]

 examples = """examples:
-    ./ugc java 185           # trace Java GCs in process 185
-    ./ugc ruby 1344 -m       # trace Ruby GCs reporting in ms
-    ./ugc -M 10 java 185     # trace only Java GCs longer than 10ms
+    ./ugc -l java 185        # trace Java GCs in process 185
+    ./ugc -l ruby 1344 -m    # trace Ruby GCs reporting in ms
+    ./ugc -M 10 -l java 185  # trace only Java GCs longer than 10ms
 """
 parser = argparse.ArgumentParser(
    description="Summarize garbage collection events in high-level languages.",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
-parser.add_argument("language", choices=["java", "python", "ruby", "node"],
+parser.add_argument("-l", "--language", choices=languages,
    help="language to trace")
 parser.add_argument("pid", type=int, help="process id to attach to")
 parser.add_argument("-v", "--verbose", action="store_true",
@@ -111,10 +114,14 @@ int trace_%s(struct pt_regs *ctx) {

 probes = []

+language = args.language
+if not language:
+    language = utils.detect_language(languages, args.pid)
+
 #
 # Java
 #
-if args.language == "java":
+if language == "java":
    # Oddly, the gc__begin/gc__end probes don't really have any useful
    # information, while the mem__pool* ones do. There's also a bunch of
    # probes described in the hotspot_gc*.stp file which aren't there
@@ -145,7 +152,7 @@ if args.language == "java":
 #
 # Python
 #
-elif args.language == "python":
+elif language == "python":
    begin_save = """
    int gen = 0;
    bpf_usdt_readarg(1, ctx, &gen);
@@ -166,7 +173,7 @@ elif args.language == "python":
 #
 # Ruby
 #
-elif args.language == "ruby":
+elif language == "ruby":
    # Ruby GC probes do not have any additional information available.
    probes.append(Probe("gc__mark__begin", "gc__mark__end",
                        "", "", lambda _: "GC mark stage"))
@@ -175,7 +182,7 @@ elif args.language == "ruby":
 #
 # Node
 #
-elif args.language == "node":
+elif language == "node":
    end_save = """
    u32 gc_type = 0;
    bpf_usdt_readarg(1, ctx, &gc_type);
@@ -188,6 +195,11 @@ elif args.language == "node":
                                     [desc for desc, val in descs.items()
                                      if e.field1 & val != 0])))

+else:
+    print("No language detected; use -l to trace a language.")
+    exit(1)
+
+
 for probe in probes:
    program += probe.generate()
    probe.attach()
@@ -198,7 +210,7 @@ if args.verbose:

 bpf = BPF(text=program, usdt_contexts=[usdt])
 print("Tracing garbage collections in %s process %d... Ctrl-C to quit." %
-      (args.language, args.pid))
+      (language, args.pid))
 time_col = "TIME (ms)" if args.milliseconds else "TIME (us)"
 print("%-8s %-8s %-40s" % ("START", time_col, "DESCRIPTION"))


--- a/tools/ugc_example.txt
+++ b/tools/ugc_example.txt
@@ -8,7 +8,7 @@ the GC event is also provided.

 For example, to trace all garbage collection events in a specific Node process:

-# ugc node $(pidof node)
+# ugc $(pidof node)
 Tracing garbage collections in node process 30012... Ctrl-C to quit.
 START    TIME (us) DESCRIPTION                             
 1.500    1181.00  GC scavenge
@@ -44,7 +44,7 @@ Occasionally, it might be useful to filter out collections that are very short,
 or display only collections that have a specific description. The -M and -F
 switches can be useful for this:

-# ugc -F Tenured java $(pidof java)
+# ugc -F Tenured $(pidof java)
 Tracing garbage collections in java process 29907... Ctrl-C to quit.
 START    TIME (us) DESCRIPTION                             
 0.360    4309.00  MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256
@@ -52,7 +52,7 @@ START    TIME (us) DESCRIPTION
 4.648    4139.00  MarkSweepCompact Tenured Gen used=287528->287528 max=173408256->173408256
 ^C

-# ugc -M 1 java $(pidof java)
+# ugc -M 1 $(pidof java)
 Tracing garbage collections in java process 29907... Ctrl-C to quit.
 START    TIME (us) DESCRIPTION                             
 0.160    3715.00  MarkSweepCompact Code Cache used=287528->3209472 max=173408256->251658240
@@ -68,18 +68,19 @@ START    TIME (us) DESCRIPTION
 USAGE message:

 # ugc -h
-usage: ugc.py [-h] [-v] [-m] [-M MINIMUM] [-F FILTER]
-              {java,python,ruby,node} pid
+usage: ugc.py [-h] [-l {java,python,ruby,node}] [-v] [-m] [-M MINIMUM]
+              [-F FILTER]
+              pid

 Summarize garbage collection events in high-level languages.

 positional arguments:
-  {java,python,ruby,node}
-                        language to trace
  pid                   process id to attach to

 optional arguments:
  -h, --help            show this help message and exit
+  -l {java,python,ruby,node}, --language {java,python,ruby,node}
+                        language to trace
  -v, --verbose         verbose mode: print the BPF program (for debugging
                        purposes)
  -m, --milliseconds    report times in milliseconds (default is microseconds)
@@ -89,6 +90,6 @@ optional arguments:
                        display only GCs whose description contains this text

 examples:
-    ./ugc java 185           # trace Java GCs in process 185
-    ./ugc ruby 1344 -m       # trace Ruby GCs reporting in ms
-    ./ugc -M 10 java 185     # trace only Java GCs longer than 10ms
+    ./ugc -l java 185        # trace Java GCs in process 185
+    ./ugc -l ruby 1344 -m    # trace Ruby GCs reporting in ms
+    ./ugc -M 10 -l java 185  # trace only Java GCs longer than 10ms
--- a/tools/uobjnew.py
+++ b/tools/uobjnew.py
@@ -13,20 +13,24 @@

 from __future__ import print_function
 import argparse
-from bcc import BPF, USDT
+from bcc import BPF, USDT, utils
 from time import sleep
+import os
+
+# C needs to be the last language.
+languages = ["java", "ruby", "c"]

 examples = """examples:
-    ./uobjnew java 145         # summarize Java allocations in process 145
-    ./uobjnew c 2020 1         # grab malloc() sizes and print every second
-    ./uobjnew ruby 6712 -C 10  # top 10 Ruby types by number of allocations
-    ./uobjnew ruby 6712 -S 10  # top 10 Ruby types by total size
+    ./uobjnew -l java 145         # summarize Java allocations in process 145
+    ./uobjnew -l c 2020 1         # grab malloc() sizes and print every second
+    ./uobjnew -l ruby 6712 -C 10  # top 10 Ruby types by number of allocations
+    ./uobjnew -l ruby 6712 -S 10  # top 10 Ruby types by total size
 """
 parser = argparse.ArgumentParser(
    description="Summarize object allocations in high-level languages.",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
-parser.add_argument("language", choices=["java", "ruby", "c"],
+parser.add_argument("-l", "--language", choices=languages,
    help="language to trace")
 parser.add_argument("pid", type=int, help="process id to attach to")
 parser.add_argument("interval", type=int, nargs='?',
@@ -39,6 +43,10 @@ parser.add_argument("-v", "--verbose", action="store_true",
    help="verbose mode: print the BPF program (for debugging purposes)")
 args = parser.parse_args()

+language = args.language
+if not language:
+    language = utils.detect_language(languages, args.pid)
+
 program = """
 #include <linux/ptrace.h>

@@ -56,14 +64,14 @@ struct val_t {
 };

 BPF_HASH(allocs, struct key_t, struct val_t);
-""".replace("MALLOC_TRACING", "1" if args.language == "c" else "0")
+""".replace("MALLOC_TRACING", "1" if language == "c" else "0")

 usdt = USDT(pid=args.pid)

 #
 # Java
 #
-if args.language == "java":
+if language == "java":
    program += """
 int alloc_entry(struct pt_regs *ctx) {
    struct key_t key = {};
@@ -82,7 +90,7 @@ int alloc_entry(struct pt_regs *ctx) {
 #
 # Ruby
 #
-elif args.language == "ruby":
+elif language == "ruby":
    create_template = """
 int THETHING_alloc_entry(struct pt_regs *ctx) {
    struct key_t key = { .name = "THETHING" };
@@ -115,7 +123,7 @@ int object_alloc_entry(struct pt_regs *ctx) {
 #
 # C
 #
-elif args.language == "c":
+elif language == "c":
    program += """
 int alloc_entry(struct pt_regs *ctx, size_t size) {
    struct key_t key = {};
@@ -128,18 +136,23 @@ int alloc_entry(struct pt_regs *ctx, size_t size) {
 }
    """

+else:
+    print("No language detected; use -l to trace a language.")
+    exit(1)
+
+
 if args.verbose:
    print(usdt.get_text())
    print(program)

 bpf = BPF(text=program, usdt_contexts=[usdt])
-if args.language == "c":
+if language == "c":
    bpf.attach_uprobe(name="c", sym="malloc", fn_name="alloc_entry",
                      pid=args.pid)

 exit_signaled = False
 print("Tracing allocations in process %d (language: %s)... Ctrl-C to quit." %
-      (args.pid, args.language or "none"))
+      (args.pid, language or "none"))
 while True:
    try:
        sleep(args.interval or 99999999)
@@ -157,7 +170,7 @@ while True:
        data = sorted(data.items(), key=lambda kv: kv[1].total_size)
    print("%-30s %8s %12s" % ("TYPE", "# ALLOCS", "# BYTES"))
    for key, value in data:
-        if args.language == "c":
+        if language == "c":
            obj_type = "block size %d" % key.size
        else:
            obj_type = key.name

--- a/tools/uobjnew_example.txt
+++ b/tools/uobjnew_example.txt
@@ -9,7 +9,7 @@ can in turn cause heavy garbage collection.
 For example, trace Ruby object allocations when running some simple commands
 in irb (the Ruby REPL):

-# ./uobjnew ruby 27245
+# ./uobjnew -l ruby 27245
 Tracing allocations in process 27245 (language: ruby)... Ctrl-C to quit.

 TYPE                           # ALLOCS      # BYTES
@@ -28,7 +28,7 @@ Plain C/C++ allocations (through "malloc") are also supported. We can't report
 the type being allocated, but we can report the object sizes at least. Also,
 print only the top 10 rows by number of bytes allocated:

-# ./uobjnew -S 10 c 27245
+# ./uobjnew -S 10 -l c 27245
 Tracing allocations in process 27245 (language: c)... Ctrl-C to quit.

 TYPE                           # ALLOCS      # BYTES
@@ -48,18 +48,19 @@ block size 80                       569        45520
 USAGE message:

 # ./uobjnew -h
-usage: uobjnew.py [-h] [-C TOP_COUNT] [-S TOP_SIZE] [-v]
-                  {java,ruby,c} pid [interval]
+usage: uobjnew.py [-h] [-l {java,ruby,c}] [-C TOP_COUNT] [-S TOP_SIZE] [-v]
+                  pid [interval]

 Summarize object allocations in high-level languages.

 positional arguments:
-  {java,ruby,c}         language to trace
  pid                   process id to attach to
  interval              print every specified number of seconds

 optional arguments:
  -h, --help            show this help message and exit
+  -l {java,ruby,c}, --language {java,ruby,c}
+                        language to trace
  -C TOP_COUNT, --top-count TOP_COUNT
                        number of most frequently allocated types to print
  -S TOP_SIZE, --top-size TOP_SIZE
@@ -68,7 +69,7 @@ optional arguments:
                        purposes)

 examples:
-    ./uobjnew java 145         # summarize Java allocations in process 145
-    ./uobjnew c 2020 1         # grab malloc() sizes and print every second
-    ./uobjnew ruby 6712 -C 10  # top 10 Ruby types by number of allocations
-    ./uobjnew ruby 6712 -S 10  # top 10 Ruby types by total size
+    ./uobjnew -l java 145         # summarize Java allocations in process 145
+    ./uobjnew -l c 2020 1         # grab malloc() sizes and print every second
+    ./uobjnew -l ruby 6712 -C 10  # top 10 Ruby types by number of allocations
+    ./uobjnew -l ruby 6712 -S 10  # top 10 Ruby types by total size
--- a/tools/uthreads.py
+++ b/tools/uthreads.py
@@ -13,9 +13,12 @@

 from __future__ import print_function
 import argparse
-from bcc import BPF, USDT
+from bcc import BPF, USDT, utils
 import ctypes as ct
 import time
+import os
+
+languages = ["java"]

 examples = """examples:
    ./uthreads -l java 185   # trace Java threads in process 185
@@ -26,7 +29,7 @@ parser = argparse.ArgumentParser(
                "high-level languages.",
    formatter_class=argparse.RawDescriptionHelpFormatter,
    epilog=examples)
-parser.add_argument("-l", "--language", choices=["java"],
+parser.add_argument("-l", "--language", choices=languages + ["none"],
    help="language to trace (none for pthreads only)")
 parser.add_argument("pid", type=int, help="process id to attach to")
 parser.add_argument("-v", "--verbose", action="store_true",
@@ -59,7 +62,11 @@ int trace_pthread(struct pt_regs *ctx) {
 """
 usdt.enable_probe_or_bail("pthread_start", "trace_pthread")

-if args.language == "java":
+language = args.language
+if not language:
+    language = utils.detect_language(languages, args.pid)
+
+if language == "java":
    template = """
 int %s(struct pt_regs *ctx) {
    char type[] = "%s";
@@ -87,7 +94,7 @@ if args.verbose:

 bpf = BPF(text=program, usdt_contexts=[usdt])
 print("Tracing thread events in process %d (language: %s)... Ctrl-C to quit." %
-      (args.pid, args.language or "none"))
+      (args.pid, language or "none"))
 print("%-8s %-16s %-8s %-30s" % ("TIME", "ID", "TYPE", "DESCRIPTION"))

 class ThreadEvent(ct.Structure):

--- a/tools/uthreads_example.txt
+++ b/tools/uthreads_example.txt
@@ -39,7 +39,7 @@ instead.
 USAGE message:

 # ./uthreads -h
-usage: uthreads.py [-h] [-l {java}] [-v] pid
+usage: uthreads.py [-h] [-l {java,none}] [-v] pid

 Trace thread creation/destruction events in high-level languages.

@@ -48,7 +48,7 @@ positional arguments:

 optional arguments:
  -h, --help            show this help message and exit
-  -l {java}, --language {java}
+  -l {java,none}, --language {java,none}
                        language to trace (none for pthreads only)
  -v, --verbose         verbose mode: print the BPF program (for debugging
                        purposes)