Commit 7cf96a44 authored by 4ast's avatar 4ast

Merge pull request #467 from goldshtn/usdt-v2

Support for global variable arguments in USDT probes
parents a4803040 5a1d2e35
...@@ -26,7 +26,7 @@ import sys ...@@ -26,7 +26,7 @@ import sys
basestring = (unicode if sys.version_info[0] < 3 else str) basestring = (unicode if sys.version_info[0] < 3 else str)
from .libbcc import lib, _CB_TYPE from .libbcc import lib, _CB_TYPE
from .procstat import ProcStat from .procstat import ProcStat, ProcUtils
from .table import Table from .table import Table
from .tracepoint import Perf, Tracepoint from .tracepoint import Perf, Tracepoint
from .usyms import ProcessSymbols from .usyms import ProcessSymbols
......
...@@ -31,3 +31,95 @@ class ProcStat(object): ...@@ -31,3 +31,95 @@ class ProcStat(object):
return os.popen("cut -d' ' -f 22 /proc/%d/stat" % return os.popen("cut -d' ' -f 22 /proc/%d/stat" %
self.pid).read() self.pid).read()
class ProcUtils(object):
@staticmethod
def get_load_address(pid, bin_path):
"""
get_load_address(pid, bin_path)
Returns the address at which the specified module is loaded
in the specified process. The module path must match exactly
the file system path, not a symbolic link.
"""
with open("/proc/%d/maps" % pid) as m:
maps = m.readlines()
addrs = map(lambda l: l.split('-')[0],
filter(lambda l: bin_path in l, maps)
)
if len(addrs) == 0:
raise ValueError("lib %s not loaded in pid %d"
% (bin_path, pid))
return int(addrs[0], 16)
@staticmethod
def get_modules(pid):
"""
get_modules(pid)
Returns a list of all the modules loaded into the specified
process. Modules are enumerated by looking at /proc/$PID/maps
and returning the module name for regions that contain
executable code.
"""
with open("/proc/%d/maps" % pid) as f:
maps = f.readlines()
modules = []
for line in maps:
parts = line.strip().split()
if len(parts) < 6:
continue
if parts[5][0] == '[' or not 'x' in parts[1]:
continue
modules.append(parts[5])
return modules
@staticmethod
def is_shared_object(bin_path):
"""
is_shared_object(bin_path)
Returns whether the specified binary is a shared object, rather
than an executable. If it is neither, an error is raised.
"""
mime_type = os.popen("file --mime-type -b %s" % bin_path
).read().strip()
if mime_type == "application/x-sharedlib":
return True
if mime_type == "application/x-executable":
return False
raise ValueError("invalid mime type %s for binary %s" %
(mime_type, bin_path))
@staticmethod
def traverse_symlink(path):
"""Returns the actual path behind the specified symlink."""
return os.popen("readlink -f %s" % path).read().strip()
@staticmethod
def which(bin_path):
"""
which(bin_path)
Traverses the PATH environment variable, looking for the first
directory that contains an executable file named bin_path, and
returns the full path to that file, or None if no such file
can be found. This is meant to replace invocations of the
"which" shell utility, which doesn't have portable semantics
for skipping aliases.
"""
# Source: http://stackoverflow.com/a/377028
def is_exe(fpath):
return os.path.isfile(fpath) and \
os.access(fpath, os.X_OK)
fpath, fname = os.path.split(bin_path)
if fpath:
if is_exe(bin_path):
return bin_path
else:
for path in os.environ["PATH"].split(os.pathsep):
path = path.strip('"')
exe_file = os.path.join(path, bin_path)
if is_exe(exe_file):
return exe_file
return None
...@@ -17,13 +17,15 @@ import struct ...@@ -17,13 +17,15 @@ import struct
import re import re
from . import BPF from . import BPF
from . import ProcStat from . import ProcStat, ProcUtils
class USDTArgument(object): class USDTArgument(object):
def __init__(self, size, is_signed, register=None, constant=None, def __init__(self, size, is_signed, location,
deref_offset=None, deref_name=None): register=None, constant=None, deref_offset=None,
deref_name=None):
self.size = size self.size = size
self.is_signed = is_signed self.is_signed = is_signed
self.location = location
self.register = register self.register = register
self.constant = constant self.constant = constant
self.deref_offset = deref_offset self.deref_offset = deref_offset
...@@ -48,9 +50,9 @@ class USDTArgument(object): ...@@ -48,9 +50,9 @@ class USDTArgument(object):
"cl": "cx", "dl": "dx" "cl": "cx", "dl": "dx"
} }
def generate_assign_to_local(self, local_name): def generate_assign_to_local(self, local_name, pid=None):
""" """
generate_assign_to_local(local_name) generate_assign_to_local(local_name, pid=None)
Generates an assignment statement that initializes a local Generates an assignment statement that initializes a local
variable with the value of this argument. Assumes that the variable with the value of this argument. Assumes that the
...@@ -59,6 +61,13 @@ class USDTArgument(object): ...@@ -59,6 +61,13 @@ class USDTArgument(object):
by the caller. Use get_type() to get the proper type for that by the caller. Use get_type() to get the proper type for that
declaration. declaration.
The pid parameter is intended for use when the argument depends
on an address that is process-specific. This only happens for
arguments that are offsets from globals -- the load address for
the global depends on the process. If no pid is specified and
the argument depends on an address that is process-specific,
an error is raised.
Example output: Example output:
local1 = (u64)ctx->di; local1 = (u64)ctx->di;
{ {
...@@ -93,10 +102,41 @@ class USDTArgument(object): ...@@ -93,10 +102,41 @@ class USDTArgument(object):
# Final case: dereference global, need to find address of global # Final case: dereference global, need to find address of global
# with the provided name and then potentially add deref_offset # with the provided name and then potentially add deref_offset
# and bpf_probe_read the result. None of this will work with BPF # and bpf_probe_read the result.
# because we can't just access arbitrary addresses. return \
return "%s = 0; /* UNSUPPORTED CASE, SEE SOURCE */" % \ """{
local_name u64 __temp = 0x%x + %d;
bpf_probe_read(&%s, sizeof(%s), (void *)__temp);
} """ % (self._get_global_address(pid), self.deref_offset,
local_name, local_name)
def _get_global_address(self, pid=None):
# If this is a library, we need to find its load address in the
# specified process and then add the global symbol's offset.
# If this is an executable, the global symbol's address doesn't
# depend on the pid.
bin_path = self.location.probe.bin_path
offset = self._get_global_offset(bin_path)
if ProcUtils.is_shared_object(self.location.probe.bin_path):
if pid is None:
raise ValueError("pid is required for " +
"argument '%s'" % str(self))
load_address = ProcUtils.get_load_address(pid, bin_path)
return load_address + offset
else:
return offset
def _get_global_offset(self, bin_path):
with os.popen("objdump -tT %s | grep '\\s%s$'" %
(bin_path, self.deref_name)) as f:
lines = f.readlines()
for line in lines:
parts = line.split()
if parts[5] != self.deref_name:
continue
return int(parts[0], 16)
raise ValueError("can't find global symbol %s" %
self.deref_name)
def get_type(self): def get_type(self):
result_type = None result_type = None
...@@ -132,18 +172,19 @@ class USDTArgument(object): ...@@ -132,18 +172,19 @@ class USDTArgument(object):
self.deref_name) self.deref_name)
class USDTProbeLocation(object): class USDTProbeLocation(object):
def __init__(self, address, args): def __init__(self, address, args, probe):
self.address = address self.address = address
self.raw_args = args self.raw_args = args
self.probe = probe
self.args = [] self.args = []
self._parse_args() self._parse_args()
def generate_usdt_assignments(self, prefix="arg"): def generate_usdt_assignments(self, prefix="arg", pid=None):
text = "" text = ""
for i, arg in enumerate(self.args, 1): for i, arg in enumerate(self.args, 1):
text += (" "*16) + \ text += (" "*16) + \
arg.generate_assign_to_local( arg.generate_assign_to_local(
"%s%d" % (prefix, i)) + "\n" "%s%d" % (prefix, i), pid) + "\n"
return text return text
def _parse_args(self): def _parse_args(self):
...@@ -168,6 +209,7 @@ class USDTProbeLocation(object): ...@@ -168,6 +209,7 @@ class USDTProbeLocation(object):
self.args.append(USDTArgument( self.args.append(USDTArgument(
int(m.group(2)), int(m.group(2)),
m.group(1) == '-', m.group(1) == '-',
self,
constant=int(m.group(3)) constant=int(m.group(3))
)) ))
return return
...@@ -184,7 +226,7 @@ class USDTProbeLocation(object): ...@@ -184,7 +226,7 @@ class USDTProbeLocation(object):
elif arg in bregs: elif arg in bregs:
size = 1 size = 1
self.args.append(USDTArgument( self.args.append(USDTArgument(
size, False, register=arg size, False, self, register=arg
)) ))
return return
...@@ -194,6 +236,7 @@ class USDTProbeLocation(object): ...@@ -194,6 +236,7 @@ class USDTProbeLocation(object):
self.args.append(USDTArgument( self.args.append(USDTArgument(
int(m.group(2)), # Size (in bytes) int(m.group(2)), # Size (in bytes)
m.group(1) == '-', # Signed m.group(1) == '-', # Signed
self,
register=m.group(3) register=m.group(3)
)) ))
return return
...@@ -201,11 +244,12 @@ class USDTProbeLocation(object): ...@@ -201,11 +244,12 @@ class USDTProbeLocation(object):
# 8@-8(%rbp), 4@(%rax) # 8@-8(%rbp), 4@(%rax)
m = re.match(r'(\-?)(\d+)@(\-?)(\d*)\(' + any_reg + r'\)', arg) m = re.match(r'(\-?)(\d+)@(\-?)(\d*)\(' + any_reg + r'\)', arg)
if m is not None: if m is not None:
deref_offset = int(m.group(4)) deref_offset = int(m.group(4)) if len(m.group(4)) > 0 \
else 0
if m.group(3) == '-': if m.group(3) == '-':
deref_offset = -deref_offset deref_offset = -deref_offset
self.args.append(USDTArgument( self.args.append(USDTArgument(
int(m.group(2)), m.group(1) == '-', int(m.group(2)), m.group(1) == '-', self,
register=m.group(5), deref_offset=deref_offset register=m.group(5), deref_offset=deref_offset
)) ))
return return
...@@ -214,7 +258,7 @@ class USDTProbeLocation(object): ...@@ -214,7 +258,7 @@ class USDTProbeLocation(object):
m = re.match(r'(\-?)(\d+)@(\w+)\(%rip\)', arg) m = re.match(r'(\-?)(\d+)@(\w+)\(%rip\)', arg)
if m is not None: if m is not None:
self.args.append(USDTArgument( self.args.append(USDTArgument(
int(m.group(2)), m.group(1) == '-', int(m.group(2)), m.group(1) == '-', self,
register="%rip", deref_name=m.group(3), register="%rip", deref_name=m.group(3),
deref_offset=0 deref_offset=0
)) ))
...@@ -227,7 +271,7 @@ class USDTProbeLocation(object): ...@@ -227,7 +271,7 @@ class USDTProbeLocation(object):
if m.group(3) == '-': if m.group(3) == '-':
deref_offset = -deref_offset deref_offset = -deref_offset
self.args.append(USDTArgument( self.args.append(USDTArgument(
int(m.group(2)), m.group(1) == '-', int(m.group(2)), m.group(1) == '-', self,
register="%rip", deref_offset=deref_offset, register="%rip", deref_offset=deref_offset,
deref_name=m.group(5) deref_name=m.group(5)
)) ))
...@@ -247,7 +291,8 @@ class USDTProbe(object): ...@@ -247,7 +291,8 @@ class USDTProbe(object):
self.locations = [] self.locations = []
def add_location(self, location, arguments): def add_location(self, location, arguments):
self.locations.append(USDTProbeLocation(location, arguments)) self.locations.append(USDTProbeLocation(
location, arguments, self))
def need_enable(self): def need_enable(self):
""" """
...@@ -299,12 +344,13 @@ int %s(struct pt_regs *ctx) { ...@@ -299,12 +344,13 @@ int %s(struct pt_regs *ctx) {
} """ % (thunk_name, name_prefix, i) } """ % (thunk_name, name_prefix, i)
return text return text
def generate_usdt_cases(self): def generate_usdt_cases(self, pid=None):
text = "" text = ""
for i, arg_type in enumerate(self.get_arg_types(), 1): for i, arg_type in enumerate(self.get_arg_types(), 1):
text += " %s arg%d = 0;\n" % (arg_type, i) text += " %s arg%d = 0;\n" % (arg_type, i)
for i, location in enumerate(self.locations): for i, location in enumerate(self.locations):
assignments = location.generate_usdt_assignments() assignments = location.generate_usdt_assignments(
pid=pid)
text += \ text += \
""" """
if (__loc_id == %d) { if (__loc_id == %d) {
...@@ -316,18 +362,11 @@ int %s(struct pt_regs *ctx) { ...@@ -316,18 +362,11 @@ int %s(struct pt_regs *ctx) {
if pid in self.proc_semas: if pid in self.proc_semas:
return self.proc_semas[pid] return self.proc_semas[pid]
if self.bin_path.endswith(".so"): if ProcUtils.is_shared_object(self.bin_path):
# Semaphores declared in shared objects are relative # Semaphores declared in shared objects are relative
# to that shared object's load address # to that shared object's load address
with open("/proc/%d/maps" % pid) as m: sema_addr = ProcUtils.get_load_address(
maps = m.readlines() pid, self.bin_path) + self.semaphore
addrs = map(lambda l: l.split('-')[0],
filter(lambda l: self.bin_path in l, maps)
)
if len(addrs) == 0:
raise ValueError("lib %s not loaded in pid %d"
% (self.bin_path, pid))
sema_addr = int(addrs[0], 16) + self.semaphore
else: else:
sema_addr = self.semaphore # executable, absolute sema_addr = self.semaphore # executable, absolute
self.proc_semas[pid] = sema_addr self.proc_semas[pid] = sema_addr
...@@ -365,32 +404,16 @@ class USDTReader(object): ...@@ -365,32 +404,16 @@ class USDTReader(object):
""" """
self.probes = [] self.probes = []
if pid != -1: if pid != -1:
for mod in USDTReader._get_modules(pid): for mod in ProcUtils.get_modules(pid):
self._add_probes(mod) self._add_probes(mod)
elif len(bin_path) != 0: elif len(bin_path) != 0:
self._add_probes(bin_path) self._add_probes(bin_path)
else: else:
raise ValueError("pid or bin_path is required") raise ValueError("pid or bin_path is required")
@staticmethod
def _get_modules(pid):
with open("/proc/%d/maps" % pid) as f:
maps = f.readlines()
modules = []
for line in maps:
parts = line.strip().split()
if len(parts) < 6:
continue
if parts[5][0] == '[' or not 'x' in parts[1]:
continue
modules.append(parts[5])
return modules
def _add_probes(self, bin_path): def _add_probes(self, bin_path):
if not os.path.isfile(bin_path): if not os.path.isfile(bin_path):
attempt1 = os.popen( attempt1 = ProcUtils.which(bin_path)
"which --skip-alias %s 2>/dev/null"
% bin_path).read().strip()
if attempt1 is None or not os.path.isfile(attempt1): if attempt1 is None or not os.path.isfile(attempt1):
attempt2 = BPF.find_library(bin_path) attempt2 = BPF.find_library(bin_path)
if attempt2 is None or \ if attempt2 is None or \
...@@ -401,6 +424,7 @@ class USDTReader(object): ...@@ -401,6 +424,7 @@ class USDTReader(object):
bin_path = attempt2 bin_path = attempt2
else: else:
bin_path = attempt1 bin_path = attempt1
bin_path = ProcUtils.traverse_symlink(bin_path)
with os.popen("readelf -n %s 2>/dev/null" % bin_path) as child: with os.popen("readelf -n %s 2>/dev/null" % bin_path) as child:
notes = child.read() notes = child.read()
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# Licensed under the Apache License, Version 2.0 (the "License") # Licensed under the Apache License, Version 2.0 (the "License")
# Copyright (C) 2016 Sasha Goldshtein. # Copyright (C) 2016 Sasha Goldshtein.
from bcc import BPF, Tracepoint, Perf, USDTReader from bcc import BPF, Tracepoint, Perf, ProcUtils, USDTReader
from time import sleep, strftime from time import sleep, strftime
import argparse import argparse
import re import re
...@@ -392,12 +392,9 @@ QUALIFIER int PROBENAME(struct pt_regs *ctx SIGNATURE) ...@@ -392,12 +392,9 @@ QUALIFIER int PROBENAME(struct pt_regs *ctx SIGNATURE)
def _attach_u(self): def _attach_u(self):
libpath = BPF.find_library(self.library) libpath = BPF.find_library(self.library)
if libpath is None: if libpath is None:
with os.popen(("which --skip-alias %s " + libpath = ProcUtils.which(self.library)
"2>/dev/null") % self.library) as w:
libpath = w.read().strip()
if libpath is None or len(libpath) == 0: if libpath is None or len(libpath) == 0:
self._bail("unable to find library %s" % self._bail("unable to find library %s" % self.library)
self.library)
if self.probe_type == "u": if self.probe_type == "u":
for i, location in enumerate(self.usdt.locations): for i, location in enumerate(self.usdt.locations):
......
...@@ -331,7 +331,8 @@ BPF_PERF_OUTPUT(%s); ...@@ -331,7 +331,8 @@ BPF_PERF_OUTPUT(%s);
prefix = self.tp.generate_get_struct() prefix = self.tp.generate_get_struct()
elif self.probe_type == "u": elif self.probe_type == "u":
signature += ", int __loc_id" signature += ", int __loc_id"
prefix = self.usdt.generate_usdt_cases() prefix = self.usdt.generate_usdt_cases(
pid=Probe.pid if Probe.pid != -1 else None)
qualifier = "static inline" qualifier = "static inline"
data_fields = "" data_fields = ""
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment