Commit 5ec4cc47 authored by yonghong-song's avatar yonghong-song Committed by GitHub

Merge pull request #1582 from iovisor/python_bytes

Discourage use of str() type strings in python API
parents 82970a67 42d6098f
This diff is collapsed.
......@@ -12,6 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import ctypes as ct
import sys
import traceback
import warnings
from .libbcc import lib
......@@ -39,3 +42,57 @@ def detect_language(candidates, pid):
res = lib.bcc_procutils_language(pid)
language = ct.cast(res, ct.c_char_p).value.decode()
return language if language in candidates else None
FILESYSTEMENCODING = sys.getfilesystemencoding()
def printb(s, file=sys.stdout):
"""
printb(s)
print a bytes object to stdout and flush
"""
buf = file.buffer if hasattr(file, "buffer") else file
buf.write(s)
buf.write(b"\n")
file.flush()
class ArgString(object):
"""
ArgString(arg)
encapsulate a system argument that can be easily coerced to a bytes()
object, which is better for comparing to kernel or probe data (which should
never be en/decode()'ed).
"""
def __init__(self, arg):
if sys.version_info[0] >= 3:
self.s = arg
else:
self.s = arg.decode(FILESYSTEMENCODING)
def __bytes__(self):
return self.s.encode(FILESYSTEMENCODING)
def __str__(self):
return self.__bytes__()
def warn_with_traceback(message, category, filename, lineno, file=None, line=None):
log = file if hasattr(file, "write") else sys.stderr
traceback.print_stack(f=sys._getframe(2), file=log)
log.write(warnings.formatwarning(message, category, filename, lineno, line))
# uncomment to get full tracebacks for invalid uses of python3+str in arguments
#warnings.showwarning = warn_with_traceback
_strict_bytes = False
def _assert_is_bytes(arg):
if arg is None:
return arg
if _strict_bytes:
assert type(arg) is bytes, "not a bytes object: %r" % arg
elif type(arg) is not bytes:
warnings.warn("not a bytes object: %r" % arg, DeprecationWarning, 2)
return ArgString(arg).__bytes__()
return arg
......@@ -15,14 +15,14 @@ class TestKSyms(TestCase):
# Grab the first symbol in kallsyms that has type 't' or 'T'.
# Also, find all aliases of this symbol which are identifiable
# by the same address.
with open("/proc/kallsyms") as f:
with open("/proc/kallsyms", "rb") as f:
for line in f:
# Extract the first 3 columns only. The 4th column
# containing the module name may not exist for all
# symbols.
(addr, t, name) = line.strip().split()[:3]
if t == "t" or t == "T":
if t == b"t" or t == b"T":
if not address:
address = addr
if addr == address:
......@@ -32,7 +32,7 @@ class TestKSyms(TestCase):
return (address, aliases)
def test_ksymname(self):
sym = BPF.ksymname("__kmalloc")
sym = BPF.ksymname(b"__kmalloc")
self.assertIsNotNone(sym)
self.assertNotEqual(sym, 0)
......@@ -58,21 +58,23 @@ class Harness(TestCase):
def tearDown(self):
self.process.kill()
self.process.wait()
self.process.stdout.close()
self.process = None
def resolve_addr(self):
sym, offset, module = self.syms.resolve(self.addr, False)
self.assertEqual(sym, self.mangled_name)
self.assertEqual(offset, 0)
self.assertTrue(module[-5:] == 'dummy')
self.assertTrue(module[-5:] == b'dummy')
sym, offset, module = self.syms.resolve(self.addr, True)
self.assertEqual(sym, 'some_namespace::some_function(int, int)')
self.assertEqual(sym, b'some_namespace::some_function(int, int)')
self.assertEqual(offset, 0)
self.assertTrue(module[-5:] == 'dummy')
self.assertTrue(module[-5:] == b'dummy')
def resolve_name(self):
script_dir = os.path.dirname(os.path.realpath(__file__))
addr = self.syms.resolve_name(os.path.join(script_dir, 'dummy'),
script_dir = os.path.dirname(os.path.realpath(__file__).encode("utf8"))
addr = self.syms.resolve_name(os.path.join(script_dir, b'dummy'),
self.mangled_name)
self.assertEqual(addr, self.addr)
pass
......@@ -82,8 +84,8 @@ class TestDebuglink(Harness):
subprocess.check_output('g++ -o dummy dummy.cc'.split())
lines = subprocess.check_output('nm dummy'.split()).splitlines()
for line in lines:
if "some_function" in line:
self.mangled_name = line.split(' ')[2]
if b"some_function" in line:
self.mangled_name = line.split(b' ')[2]
break
self.assertTrue(self.mangled_name)
......@@ -108,8 +110,8 @@ class TestBuildid(Harness):
.split())
lines = subprocess.check_output('nm dummy'.split()).splitlines()
for line in lines:
if "some_function" in line:
self.mangled_name = line.split(' ')[2]
if b"some_function" in line:
self.mangled_name = line.split(b' ')[2]
break
self.assertTrue(self.mangled_name)
......
......@@ -2,7 +2,7 @@
# Copyright (c) Suchakra Sharma <suchakrapani.sharma@polymtl.ca>
# Licensed under the Apache License, Version 2.0 (the "License")
from bcc import BPF, _get_num_open_probes
from bcc import BPF, _get_num_open_probes, TRACEFS
import os
import sys
from unittest import main, TestCase
......@@ -18,9 +18,9 @@ class TestKprobeCnt(TestCase):
def test_attach1(self):
actual_cnt = 0
with open("/sys/kernel/debug/tracing/available_filter_functions") as f:
with open("%s/available_filter_functions" % TRACEFS, "rb") as f:
for line in f:
if str(line).startswith("vfs_"):
if line.startswith(b"vfs_"):
actual_cnt += 1
open_cnt = self.b.num_open_kprobes()
self.assertEqual(actual_cnt, open_cnt)
......
......@@ -47,7 +47,7 @@ int kprobe__htab_map_lookup_elem(struct pt_regs *ctx, struct bpf_map *map, u64 *
stackid = stack_entries[k]
self.assertIsNotNone(stackid)
stack = stack_traces[stackid].ip
self.assertEqual(b.ksym(stack[0]), "htab_map_lookup_elem")
self.assertEqual(b.ksym(stack[0]), b"htab_map_lookup_elem")
if __name__ == "__main__":
......
......@@ -52,12 +52,12 @@ class SmokeTests(TestCase):
or (rc == 137 and kill), "rc was %d" % rc)
def kmod_loaded(self, mod):
mods = open("/proc/modules", "r")
reg = re.compile("^%s\s" % mod)
for line in mods:
if reg.match(line):
return 1
return 0
with open("/proc/modules", "r") as mods:
reg = re.compile("^%s\s" % mod)
for line in mods:
if reg.match(line):
return 1
return 0
def setUp(self):
pass
......
......@@ -138,16 +138,16 @@ while 1:
counts = b.get_table("counts")
for k, v in sorted(counts.items(), key=lambda counts: counts[1].value):
if re.match('mark_page_accessed', b.ksym(k.ip)) is not None:
if re.match(b'mark_page_accessed', b.ksym(k.ip)) is not None:
mpa = max(0, v.value)
if re.match('mark_buffer_dirty', b.ksym(k.ip)) is not None:
if re.match(b'mark_buffer_dirty', b.ksym(k.ip)) is not None:
mbd = max(0, v.value)
if re.match('add_to_page_cache_lru', b.ksym(k.ip)) is not None:
if re.match(b'add_to_page_cache_lru', b.ksym(k.ip)) is not None:
apcl = max(0, v.value)
if re.match('account_page_dirtied', b.ksym(k.ip)) is not None:
if re.match(b'account_page_dirtied', b.ksym(k.ip)) is not None:
apd = max(0, v.value)
# access = total cache access incl. reads(mpa) and writes(mbd)
......
......@@ -18,6 +18,8 @@
from __future__ import print_function
from bcc import BPF
from bcc.utils import ArgString, printb
import bcc.utils as utils
import argparse
import ctypes as ct
import re
......@@ -41,8 +43,10 @@ parser.add_argument("-t", "--timestamp", action="store_true",
parser.add_argument("-x", "--fails", action="store_true",
help="include failed exec()s")
parser.add_argument("-n", "--name",
type=ArgString,
help="only print commands matching this name (regex), any arg")
parser.add_argument("-l", "--line",
type=ArgString,
help="only print commands where arg contains this line (regex)")
parser.add_argument("--max-args", default="20",
help="maximum number of arguments parsed and displayed, defaults to 20")
......@@ -186,19 +190,19 @@ def print_event(cpu, data, size):
elif event.type == EventType.EVENT_RET:
if event.retval != 0 and not args.fails:
skip = True
if args.name and not re.search(args.name, event.comm):
if args.name and not re.search(bytes(args.name), event.comm):
skip = True
if args.line and not re.search(args.line,
b' '.join(argv[event.pid]).decode()):
if args.line and not re.search(bytes(args.line),
b' '.join(argv[event.pid])):
skip = True
if not skip:
if args.timestamp:
print("%-8.3f" % (time.time() - start_ts), end="")
ppid = get_ppid(event.pid)
print("%-16s %-6s %-6s %3s %s" % (event.comm.decode(), event.pid,
ppid if ppid > 0 else "?", event.retval,
b' '.join(argv[event.pid]).decode()))
ppid = b"%d" % ppid if ppid > 0 else b"?"
printb(b"%-16s %-6d %-6s %3d %s" % (event.comm, event.pid,
ppid, event.retval, b' '.join(argv[event.pid])))
try:
del(argv[event.pid])
except Exception:
......
......@@ -16,7 +16,7 @@
# 18-Oct-2016 Sasha Goldshtein Generalized for uprobes, tracepoints, USDT.
from __future__ import print_function
from bcc import BPF, USDT
from bcc import ArgString, BPF, USDT
from time import sleep, strftime
import argparse
import os
......@@ -49,15 +49,15 @@ class Probe(object):
t:cat:event -- probe a kernel tracepoint
u:lib:probe -- probe a USDT tracepoint
"""
parts = pattern.split(':')
parts = bytes(pattern).split(b':')
if len(parts) == 1:
parts = ["p", "", parts[0]]
parts = [b"p", b"", parts[0]]
elif len(parts) == 2:
parts = ["p", parts[0], parts[1]]
parts = [b"p", parts[0], parts[1]]
elif len(parts) == 3:
if parts[0] == "t":
parts = ["t", "", "%s:%s" % tuple(parts[1:])]
if parts[0] not in ["p", "t", "u"]:
if parts[0] == b"t":
parts = [b"t", b"", b"%s:%s" % tuple(parts[1:])]
if parts[0] not in [b"p", b"t", b"u"]:
raise Exception("Type must be 'p', 't', or 'u', but got %s" %
parts[0])
else:
......@@ -66,10 +66,10 @@ class Probe(object):
(self.type, self.library, self.pattern) = parts
if not use_regex:
self.pattern = self.pattern.replace('*', '.*')
self.pattern = '^' + self.pattern + '$'
self.pattern = self.pattern.replace(b'*', b'.*')
self.pattern = b'^' + self.pattern + b'$'
if (self.type == "p" and self.library) or self.type == "u":
if (self.type == b"p" and self.library) or self.type == b"u":
libpath = BPF.find_library(self.library)
if libpath is None:
# This might be an executable (e.g. 'bash')
......@@ -83,46 +83,46 @@ class Probe(object):
self.trace_functions = {} # map location number to function name
def is_kernel_probe(self):
return self.type == "t" or (self.type == "p" and self.library == "")
return self.type == b"t" or (self.type == b"p" and self.library == b"")
def attach(self):
if self.type == "p" and not self.library:
if self.type == b"p" and not self.library:
for index, function in self.trace_functions.items():
self.bpf.attach_kprobe(
event=function,
fn_name="trace_count_%d" % index)
elif self.type == "p" and self.library:
elif self.type == b"p" and self.library:
for index, function in self.trace_functions.items():
self.bpf.attach_uprobe(
name=self.library,
sym=function,
fn_name="trace_count_%d" % index,
pid=self.pid or -1)
elif self.type == "t":
elif self.type == b"t":
for index, function in self.trace_functions.items():
self.bpf.attach_tracepoint(
tp=function,
fn_name="trace_count_%d" % index)
elif self.type == "u":
elif self.type == b"u":
pass # Nothing to do -- attach already happened in `load`
def _add_function(self, template, probe_name):
new_func = "trace_count_%d" % self.matched
text = template.replace("PROBE_FUNCTION", new_func)
text = text.replace("LOCATION", str(self.matched))
new_func = b"trace_count_%d" % self.matched
text = template.replace(b"PROBE_FUNCTION", new_func)
text = text.replace(b"LOCATION", b"%d" % self.matched)
self.trace_functions[self.matched] = probe_name
self.matched += 1
return text
def _generate_functions(self, template):
self.usdt = None
text = ""
if self.type == "p" and not self.library:
text = b""
if self.type == b"p" and not self.library:
functions = BPF.get_kprobe_functions(self.pattern)
verify_limit(len(functions))
for function in functions:
text += self._add_function(template, function)
elif self.type == "p" and self.library:
elif self.type == b"p" and self.library:
# uprobes are tricky because the same function may have multiple
# addresses, and the same address may be mapped to multiple
# functions. We aren't allowed to create more than one uprobe
......@@ -139,12 +139,12 @@ class Probe(object):
addresses.add(address)
functions.add(function)
text += self._add_function(template, function)
elif self.type == "t":
elif self.type == b"t":
tracepoints = BPF.get_tracepoints(self.pattern)
verify_limit(len(tracepoints))
for tracepoint in tracepoints:
text += self._add_function(template, tracepoint)
elif self.type == "u":
elif self.type == b"u":
self.usdt = USDT(path=self.library, pid=self.pid)
matches = []
for probe in self.usdt.enumerate_probes():
......@@ -154,7 +154,7 @@ class Probe(object):
matches.append(probe.name)
verify_limit(len(matches))
for match in matches:
new_func = "trace_count_%d" % self.matched
new_func = b"trace_count_%d" % self.matched
text += self._add_function(template, match)
self.usdt.enable_probe(match, new_func)
if debug:
......@@ -162,7 +162,7 @@ class Probe(object):
return text
def load(self):
trace_count_text = """
trace_count_text = b"""
int PROBE_FUNCTION(void *ctx) {
FILTER
int loc = LOCATION;
......@@ -174,7 +174,7 @@ int PROBE_FUNCTION(void *ctx) {
return 0;
}
"""
bpf_text = """#include <uapi/linux/ptrace.h>
bpf_text = b"""#include <uapi/linux/ptrace.h>
BPF_ARRAY(counts, u64, NUMLOCATIONS);
"""
......@@ -182,15 +182,15 @@ BPF_ARRAY(counts, u64, NUMLOCATIONS);
# We really mean the tgid from the kernel's perspective, which is in
# the top 32 bits of bpf_get_current_pid_tgid().
if self.pid:
trace_count_text = trace_count_text.replace('FILTER',
"""u32 pid = bpf_get_current_pid_tgid() >> 32;
trace_count_text = trace_count_text.replace(b'FILTER',
b"""u32 pid = bpf_get_current_pid_tgid() >> 32;
if (pid != %d) { return 0; }""" % self.pid)
else:
trace_count_text = trace_count_text.replace('FILTER', '')
trace_count_text = trace_count_text.replace(b'FILTER', b'')
bpf_text += self._generate_functions(trace_count_text)
bpf_text = bpf_text.replace("NUMLOCATIONS",
str(len(self.trace_functions)))
bpf_text = bpf_text.replace(b"NUMLOCATIONS",
b"%d" % len(self.trace_functions))
if debug:
print(bpf_text)
......@@ -242,6 +242,7 @@ class Tool(object):
parser.add_argument("-D", "--debug", action="store_true",
help="print BPF program before starting (for debugging purposes)")
parser.add_argument("pattern",
type=ArgString,
help="search expression for events")
self.args = parser.parse_args()
global debug
......@@ -260,7 +261,7 @@ class Tool(object):
self.probe.load()
self.probe.attach()
print("Tracing %d functions for \"%s\"... Hit Ctrl-C to end." %
(self.probe.matched, self.args.pattern))
(self.probe.matched, bytes(self.args.pattern)))
exiting = 0 if self.args.interval else 1
seconds = 0
while True:
......
......@@ -471,7 +471,7 @@ def print_outstanding():
key=lambda a: a.size)[-top_stacks:]
for alloc in to_show:
print("\t%d bytes in %d allocations from stack\n\t\t%s" %
(alloc.size, alloc.count, "\n\t\t".join(alloc.stack)))
(alloc.size, alloc.count, b"\n\t\t".join(alloc.stack)))
def print_outstanding_combined():
stack_traces = bpf["stack_traces"]
......
......@@ -17,6 +17,7 @@
from __future__ import print_function
from bcc import BPF
from bcc.utils import printb
from time import sleep, strftime
import argparse
import signal
......@@ -124,7 +125,7 @@ while 1:
line = 0
for k, v in reversed(sorted(counts.items(),
key=lambda counts: counts[1].size)):
print("%-32s %6d %10d" % (k.name.decode(), v.count, v.size))
printb(b"%-32s %6d %10d" % (k.name, v.count, v.size))
line += 1
if line >= maxrows:
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment