From 0865f94cb2c04645e4f126a32bed6afc4f25edc6 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Wed, 27 Apr 2016 16:57:58 +0200
Subject: [PATCH] usdt: Implement probe resolution [WIP]

---
 .clang-format              |   1 +
 src/cc/CMakeLists.txt      |   4 +-
 src/cc/usdt.cc             | 129 +++++++++++++++++++++++++++++++++++++
 src/cc/usdt.h              |  70 ++++++++++++++------
 src/cc/usdt_args.cc        |  53 ++++++++-------
 tests/cc/test_usdt_args.cc |   3 +-
 6 files changed, 212 insertions(+), 48 deletions(-)
 create mode 100644 src/cc/usdt.cc

diff --git a/.clang-format b/.clang-format
index 27ba3b3d..99a0b571 100644
--- a/.clang-format
+++ b/.clang-format
@@ -2,3 +2,4 @@
 BasedOnStyle: Google
 AllowShortIfStatementsOnASingleLine: false
 IndentCaseLabels: false
+AccessModifierOffset: -2
diff --git a/src/cc/CMakeLists.txt b/src/cc/CMakeLists.txt
index c1b11f04..93041ae2 100644
--- a/src/cc/CMakeLists.txt
+++ b/src/cc/CMakeLists.txt
@@ -33,12 +33,12 @@ if (CMAKE_COMPILER_IS_GNUCC)
   endif()
 endif()
 
-add_library(bcc-shared SHARED bpf_common.cc bpf_module.cc libbpf.c perf_reader.c shared_table.cc exported_files.cc bcc_elf.c bcc_proc.c bcc_syms.cc usdt_args.cc)
+add_library(bcc-shared SHARED bpf_common.cc bpf_module.cc libbpf.c perf_reader.c shared_table.cc exported_files.cc bcc_elf.c bcc_proc.c bcc_syms.cc usdt_args.cc usdt.cc)
 set_target_properties(bcc-shared PROPERTIES VERSION ${REVISION_LAST} SOVERSION 0)
 set_target_properties(bcc-shared PROPERTIES OUTPUT_NAME bcc)
 
 add_library(bcc-loader-static libbpf.c perf_reader.c bcc_elf.c bcc_proc.c)
-add_library(bcc-static STATIC bpf_common.cc bpf_module.cc shared_table.cc exported_files.cc bcc_syms.cc usdt_args.cc)
+add_library(bcc-static STATIC bpf_common.cc bpf_module.cc shared_table.cc exported_files.cc bcc_syms.cc usdt_args.cc usdt.cc)
 set_target_properties(bcc-static PROPERTIES OUTPUT_NAME bcc)
 
 # BPF is still experimental otherwise it should be available
diff --git a/src/cc/usdt.cc b/src/cc/usdt.cc
new file mode 100644
index 00000000..3cc3e888
--- /dev/null
+++ b/src/cc/usdt.cc
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2016 GitHub, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <sstream>
+
+#include "bcc_elf.h"
+#include "usdt.h"
+#include "vendor/tinyformat.hpp"
+
+namespace USDT {
+
+Probe::Location::Location(uint64_t addr, const char *arg_fmt) : address_(addr) {
+  ArgumentParser_x64 parser(arg_fmt);
+  while (!parser.done()) {
+    Argument *arg = new Argument();
+    if (!parser.parse(arg)) {
+      delete arg;  // TODO: report error
+      continue;
+    }
+    arguments_.push_back(arg);
+  }
+}
+
+Probe::Probe(const char *bin_path, const char *provider, const char *name,
+             uint64_t semaphore)
+    : bin_path_(bin_path),
+      provider_(provider),
+      name_(name),
+      semaphore_(semaphore) {}
+
+const std::string &Probe::usdt_thunks(const std::string &prefix) {
+  if (!gen_thunks_.empty())
+    return gen_thunks_;
+
+  std::ostringstream stream;
+  for (size_t i = 0; i < locations_.size(); ++i) {
+    tfm::format(
+        stream,
+        "int %s_thunk_%d(struct pt_regs *ctx) { return %s(ctx, %d); }\n",
+        prefix, i, prefix, i);
+  }
+
+  gen_thunks_ = stream.str();
+  return gen_thunks_;
+}
+
+const std::string &Probe::usdt_cases(const optional<int> &pid) {
+  if (!gen_cases_.empty())
+    return gen_cases_;
+
+  std::ostringstream stream;
+  size_t arg_count = locations_[0].arguments_.size();
+
+  for (size_t arg_n = 0; arg_n < arg_count; ++arg_n) {
+    Argument *largest = nullptr;
+    for (Location &location : locations_) {
+      Argument *candidate = location.arguments_[arg_n];
+      if (!largest || candidate->arg_size() > largest->arg_size())
+        largest = candidate;
+    }
+
+    tfm::format(stream, "        %s arg%d = 0;\n", largest->ctype(), arg_n + 1);
+  }
+
+  for (size_t loc_n = 0; loc_n < locations_.size(); ++loc_n) {
+    Location &location = locations_[loc_n];
+    tfm::format(stream, "if (__loc_id == %d) {\n", loc_n);
+
+    for (size_t arg_n = 0; arg_n < location.arguments_.size(); ++arg_n) {
+      Argument *arg = location.arguments_[arg_n];
+      arg->assign_to_local(stream, tfm::format("arg%d", arg_n + 1), bin_path_,
+                           pid);
+    }
+    stream << "}\n";
+  }
+
+  gen_cases_ = stream.str();
+  return gen_cases_;
+}
+
+void Probe::add_location(uint64_t addr, const char *fmt) {
+  locations_.emplace_back(addr, fmt);
+}
+
+void Context::_each_probe(const char *binpath, const struct bcc_elf_usdt *probe,
+                          void *p) {
+  Context *ctx = static_cast<Context *>(p);
+  ctx->add_probe(binpath, probe);
+}
+
+void Context::add_probe(const char *binpath, const struct bcc_elf_usdt *probe) {
+  Probe *found_probe = nullptr;
+
+  for (Probe *p : probes_) {
+    if (p->provider_ == probe->provider && p->name_ == probe->name) {
+      found_probe = p;
+      break;
+    }
+  }
+
+  if (!found_probe) {
+    found_probe =
+        new Probe(binpath, probe->provider, probe->name, probe->semaphore);
+    probes_.push_back(found_probe);
+  }
+
+  found_probe->add_location(probe->pc, probe->arg_fmt);
+}
+
+void Context::add_probes(const std::string &bin_path) {
+  bcc_elf_foreach_usdt(bin_path.c_str(), _each_probe, this);
+}
+
+Context::Context(const std::string &bin_path) { add_probes(bin_path); }
+
+Context::Context(int pid) {}
+}
diff --git a/src/cc/usdt.h b/src/cc/usdt.h
index 3f07450e..01f47de7 100644
--- a/src/cc/usdt.h
+++ b/src/cc/usdt.h
@@ -17,6 +17,8 @@
 
 #include <string>
 #include <unordered_map>
+#include <vector>
+
 #include "vendor/optional.hpp"
 
 namespace USDT {
@@ -26,22 +28,24 @@ using std::experimental::nullopt;
 class ArgumentParser;
 
 class Argument {
- private:
+private:
   optional<int> arg_size_;
   optional<int> constant_;
   optional<int> deref_offset_;
   optional<std::string> deref_ident_;
   optional<std::string> register_name_;
 
-  uint64_t get_global_address(int pid) const;
+  uint64_t get_global_address(const std::string &binpath,
+                              const optional<int> &pid) const;
   static const std::unordered_map<std::string, std::string> translations_;
 
- public:
+public:
   Argument();
   ~Argument();
 
-  void assign_to_local(std::ostream &stream,
-    const std::string &local_name, optional<int> pid = nullopt) const;
+  void assign_to_local(std::ostream &stream, const std::string &local_name,
+                       const std::string &binpath,
+                       const optional<int> &pid = nullopt) const;
 
   int arg_size() const { return arg_size_.value_or(sizeof(void *)); }
   std::string ctype() const;
@@ -59,7 +63,7 @@ class ArgumentParser {
   const char *arg_;
   ssize_t cur_pos_;
 
- protected:
+protected:
   virtual bool validate_register(const std::string &reg, int *reg_size) = 0;
 
   ssize_t parse_number(ssize_t pos, optional<int> *number);
@@ -70,7 +74,7 @@ class ArgumentParser {
 
   void print_error(ssize_t pos);
 
- public:
+public:
   bool parse(Argument *dest);
   bool done() { return arg_[cur_pos_] == '\0'; }
 
@@ -81,21 +85,51 @@ class ArgumentParser_x64 : public ArgumentParser {
   static const std::unordered_map<std::string, int> registers_;
   bool validate_register(const std::string &reg, int *reg_size);
 
- public:
+public:
   ArgumentParser_x64(const char *arg) : ArgumentParser(arg) {}
 };
 
-struct Probe {
-  std::string _bin_path;
-  std::string _provider;
-  std::string _name;
-  uint64_t _semaphore;
+class Probe {
+  std::string bin_path_;
+  std::string provider_;
+  std::string name_;
+  uint64_t semaphore_;
+
+  struct Location {
+    uint64_t address_;
+    std::vector<Argument *> arguments_;
+    Location(uint64_t addr, const char *arg_fmt);
+  };
+
+  std::vector<Location> locations_;
 
+  std::string gen_thunks_;
+  std::string gen_cases_;
+
+public:
   Probe(const char *bin_path, const char *provider, const char *name,
-        uint64_t semaphore)
-      : _bin_path(bin_path),
-        _provider(provider),
-        _name(name),
-        _semaphore(semaphore) {}
+        uint64_t semaphore);
+
+  void add_location(uint64_t addr, const char *fmt);
+  bool need_enable() const { return semaphore_ != 0x0; }
+  size_t location_count() const { return locations_.size(); }
+
+  const std::string &usdt_thunks(const std::string &prefix);
+  const std::string &usdt_cases(const optional<int> &pid);
+
+  friend class Context;
+};
+
+class Context {
+  std::vector<Probe *> probes_;
+
+  static void _each_probe(const char *binpath, const struct bcc_elf_usdt *probe,
+                          void *p);
+  void add_probe(const char *binpath, const struct bcc_elf_usdt *probe);
+  void add_probes(const std::string &bin_path);
+
+public:
+  Context(const std::string &bin_path);
+  Context(int pid);
 };
 }
diff --git a/src/cc/usdt_args.cc b/src/cc/usdt_args.cc
index ed21bb7b..dfd929c7 100644
--- a/src/cc/usdt_args.cc
+++ b/src/cc/usdt_args.cc
@@ -13,8 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include "usdt.h"
 #include <unordered_map>
+
+#include "usdt.h"
 #include "vendor/tinyformat.hpp"
 
 namespace USDT {
@@ -23,20 +24,16 @@ Argument::Argument() {}
 Argument::~Argument() {}
 
 const std::unordered_map<std::string, std::string> Argument::translations_ = {
-  {"rax", "ax"}, {"rbx", "bx"}, {"rcx", "cx"}, {"rdx", "dx"},
-  {"rdi", "di"}, {"rsi", "si"}, {"rbp", "bp"}, {"rsp", "sp"},
-  {"rip", "ip"}, {"eax", "ax"}, {"ebx", "bx"}, {"ecx", "cx"},
-  {"edx", "dx"}, {"edi", "di"}, {"esi", "si"}, {"ebp", "bp"},
-  {"esp", "sp"}, {"eip", "ip"},
+    {"rax", "ax"}, {"rbx", "bx"}, {"rcx", "cx"}, {"rdx", "dx"}, {"rdi", "di"},
+    {"rsi", "si"}, {"rbp", "bp"}, {"rsp", "sp"}, {"rip", "ip"}, {"eax", "ax"},
+    {"ebx", "bx"}, {"ecx", "cx"}, {"edx", "dx"}, {"edi", "di"}, {"esi", "si"},
+    {"ebp", "bp"}, {"esp", "sp"}, {"eip", "ip"},
 
-  {"al", "ax"}, {"bl", "bx"}, {"cl", "cx"}, {"dl", "dx"}
-};
+    {"al", "ax"},  {"bl", "bx"},  {"cl", "cx"},  {"dl", "dx"}};
 
 std::string Argument::ctype() const {
   const int s = arg_size() * 8;
-  return (s < 0) ?
-    tfm::format("int%d_t", -s) :
-    tfm::format("uint%d_t", s);
+  return (s < 0) ? tfm::format("int%d_t", -s) : tfm::format("uint%d_t", s);
 }
 
 void Argument::normalize_register_name(std::string *normalized) const {
@@ -52,43 +49,45 @@ void Argument::normalize_register_name(std::string *normalized) const {
     normalized->assign(it->second);
 }
 
-uint64_t Argument::get_global_address(int pid) const {
+uint64_t Argument::get_global_address(const std::string &binpath,
+                                      const optional<int> &pid) const {
   return 0x0;
 }
 
 void Argument::assign_to_local(std::ostream &stream,
-    const std::string &local_name, optional<int> pid) const {
-
+                               const std::string &local_name,
+                               const std::string &binpath,
+                               const optional<int> &pid) const {
   std::string regname;
   normalize_register_name(&regname);
 
   if (constant_) {
-    tfm::format(stream, "%s = %d;", local_name, *constant_);
+    tfm::format(stream, "%s = %d;\n", local_name, *constant_);
     return;
   }
 
   if (!deref_offset_) {
-    tfm::format(stream, "%s = (%s)ctx->%s;", local_name, ctype(), regname);
+    tfm::format(stream, "%s = (%s)ctx->%s;\n", local_name, ctype(), regname);
     return;
   }
 
   if (deref_offset_ && !deref_ident_) {
     tfm::format(stream,
-      "{\n"
-      "    u64 __temp = ctx->%s + (%d);\n"
-      "    bpf_probe_read(&%s, sizeof(%s), (void *)__temp);\n"
-      "}\n",
-      regname, *deref_offset_, local_name, local_name);
+                "{\n"
+                "    u64 __temp = ctx->%s + (%d);\n"
+                "    bpf_probe_read(&%s, sizeof(%s), (void *)__temp);\n"
+                "}\n",
+                regname, *deref_offset_, local_name, local_name);
     return;
   }
 
   tfm::format(stream,
-    "{\n"
-    "    u64 __temp = 0x%xull + %d;\n"
-    "    bpf_probe_read(&%s, sizeof(%s), (void *)__temp);\n"
-    "}\n",
-    get_global_address(pid.value_or(-1)),
-    *deref_offset_, local_name, local_name);
+              "{\n"
+              "    u64 __temp = 0x%xull + %d;\n"
+              "    bpf_probe_read(&%s, sizeof(%s), (void *)__temp);\n"
+              "}\n",
+              get_global_address(binpath, pid), *deref_offset_, local_name,
+              local_name);
 }
 
 ssize_t ArgumentParser::parse_number(ssize_t pos, optional<int> *result) {
diff --git a/tests/cc/test_usdt_args.cc b/tests/cc/test_usdt_args.cc
index 9ab32f0d..946e5196 100644
--- a/tests/cc/test_usdt_args.cc
+++ b/tests/cc/test_usdt_args.cc
@@ -13,8 +13,9 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-#include <string>
 #include <iostream>
+#include <string>
+
 #include "catch.hpp"
 #include "usdt.h"
 
-- 
2.30.9