Commit 3d7cadd3 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #571 from kmod/perf_investigations

Perf investigations
parents bf75c321 5e3f1985
t = (1,)
d = {(1,):1}
for i in xrange(4000000):
d[t]
import sre_compile
import sre_constants
import re
IN = sre_constants.IN
def _identity(x):
return x
_cache = {}
DEBUG = 1
def _compile(*key):
# print "re._compile", key
# internal: compile pattern
pattern, flags = key
bypass_cache = flags & DEBUG
if not bypass_cache:
cachekey = (type(key[0]),) + key
p = _cache.get(cachekey)
if p is not None:
# print "got from cache"
return p
p = (1, 0)
_cache[cachekey] = p
return p
for i in xrange(1000000):
# sre_compile._optimize_charset([('negate', None), ('literal', 34), ('literal', 92)], _identity)
# sre_compile._compile([17, 4, 0, 3, 0, 29, 12, 0, 4294967295L, 15, 7, 26, 19, 34, 19, 92, 0, 1, 28, 0, 0, 4294967295L, 7, 6, 19, 92, 2, 18, 15, 13, 19, 34, 5, 7, 0, 19, 34, 19, 34, 1, 18, 2, 0, 29, 0, 0, 4294967295L], [(IN, [('negate', None), ('literal', 34), ('literal', 92)])], 0)
# sre_compile._compile([17, 8, 3, 1, 1, 1, 1, 97, 0], [('literal', 97)], 0)
_compile("a", 0)
......@@ -790,7 +790,7 @@ public:
}
}
if (VERBOSITY("types")) {
if (VERBOSITY("types") >= 2) {
printf("Type analysis: %d BBs, %d evaluations = %.1f evaluations/block\n", starting_types.size(),
num_evaluations, 1.0 * num_evaluations / starting_types.size());
}
......
......@@ -135,12 +135,6 @@ static void compileIR(CompiledFunction* cf, EffortLevel effort) {
assert(cf);
assert(cf->func);
// g.engine->finalizeOBject();
if (VERBOSITY("irgen") >= 1) {
printf("Compiling...\n");
// g.cur_module->dump();
}
void* compiled = NULL;
cf->code = NULL;
if (effort > EffortLevel::INTERPRETED) {
......@@ -195,12 +189,20 @@ CompiledFunction* compileFunction(CLFunction* f, FunctionSpecialization* spec, E
ASSERT(f->versions.size() < 20, "%s %ld", name.c_str(), f->versions.size());
if (VERBOSITY("irgen") >= 1) {
if (VERBOSITY("irgen") >= 2 || (VERBOSITY("irgen") == 1 && effort > EffortLevel::INTERPRETED)) {
std::string s;
llvm::raw_string_ostream ss(s);
const char* colors[] = {
"30", // grey/black
"34", // blue
"31", // red
"31;40", // red-on-black/grey
};
RELEASE_ASSERT((int)effort < sizeof(colors) / sizeof(colors[0]), "");
if (spec) {
ss << "\033[34;1mJIT'ing " << source->fn << ":" << name << " with signature (";
ss << "\033[" << colors[(int)effort] << ";1mJIT'ing " << source->fn << ":" << name << " with signature (";
for (int i = 0; i < spec->arg_types.size(); i++) {
if (i > 0)
ss << ", ";
......@@ -210,8 +212,8 @@ CompiledFunction* compileFunction(CLFunction* f, FunctionSpecialization* spec, E
ss << ") -> ";
ss << spec->rtn_type->debugName();
} else {
ss << "\033[34;1mDoing OSR-entry partial compile of " << source->fn << ":" << name
<< ", starting with backedge to block " << entry_descriptor->backedge->target->idx;
ss << "\033[" << colors[(int)effort] << ";1mDoing OSR-entry partial compile of " << source->fn << ":"
<< name << ", starting with backedge to block " << entry_descriptor->backedge->target->idx;
}
ss << " at effort level " << (int)effort << '\n';
......@@ -255,7 +257,7 @@ CompiledFunction* compileFunction(CLFunction* f, FunctionSpecialization* spec, E
static StatCounter us_compiling("us_compiling");
us_compiling.log(us);
if (VERBOSITY() >= 1 && us > 100000) {
printf("Took %ldms to compile %s::%s!\n", us / 1000, source->fn.c_str(), name.c_str());
printf("Took %ldms to compile %s::%s (effort %d)!\n", us / 1000, source->fn.c_str(), name.c_str(), (int)effort);
}
static StatCounter num_compiles("num_compiles");
......
......@@ -2611,7 +2611,7 @@ public:
#if ENABLE_SAMPLING_PROFILER
auto stmt = block->body[i];
if (stmt->type != AST_TYPE::Assign) // could be a landingpad
if (!(i == 0 && stmt->type == AST_TYPE::Assign) && stmt->lineno > 0) // could be a landingpad
doSafePoint(block->body[i]);
#endif
......
......@@ -174,7 +174,7 @@ public:
continue;
llvm::Function* f = g.func_addr_registry.getLLVMFuncAtAddress((void*)addr);
if (f == NULL) {
if (VERBOSITY()) {
if (VERBOSITY() >= 3) {
printf("Giving up on inlining %s:\n",
g.func_addr_registry.getFuncNameAtAddress((void*)addr, true).c_str());
call->dump();
......@@ -233,16 +233,16 @@ public:
llvm::InlineCost IC = cost_analysis->getInlineCost(cs, threshold);
bool do_inline = false;
if (IC.isAlways()) {
if (VERBOSITY("irgen.inlining") >= 2)
llvm::errs() << "always inline\n";
// if (VERBOSITY("irgen.inlining") >= 2)
// llvm::errs() << "always inline\n";
do_inline = true;
} else if (IC.isNever()) {
if (VERBOSITY("irgen.inlining") >= 2)
llvm::errs() << "never inline\n";
// if (VERBOSITY("irgen.inlining") >= 2)
// llvm::errs() << "never inline\n";
do_inline = false;
} else {
if (VERBOSITY("irgen.inlining") >= 2)
llvm::errs() << "Inline cost: " << IC.getCost() << '\n';
// if (VERBOSITY("irgen.inlining") >= 2)
// llvm::errs() << "Inline cost: " << IC.getCost() << '\n';
do_inline = (bool)IC;
}
......
......@@ -50,14 +50,14 @@ private:
InternedStringPool* intern_pool;
void ensure(int num) {
if (end - start < num) {
if (unlikely(fp) && end - start < num) {
fill();
}
}
public:
void fill() {
if (fp) {
if (unlikely(fp)) {
memmove(buf, buf + start, end - start);
end -= start;
start = 0;
......@@ -77,7 +77,7 @@ public:
ensure(1);
RELEASE_ASSERT(end > start, "premature eof");
if (fp) {
if (unlikely(fp)) {
return buf[start++];
} else {
return data[start++];
......
......@@ -401,7 +401,7 @@ static inline bool find_call_site_entry(const lsda_info_t* info, const uint8_t*
while (p < info->action_table) { // The call site table ends where the action table begins.
p = parse_call_site_entry(p, info, entry);
if (VERBOSITY("cxx_unwind") >= 3) {
if (VERBOSITY("cxx_unwind") >= 5) {
printf(" start %p end %p landingpad %p action %lx\n", entry->instrs_start,
entry->instrs_start + entry->instrs_len_bytes, entry->landing_pad, entry->action_offset_plus_one);
}
......@@ -424,7 +424,7 @@ static inline NORETURN void resume(unw_cursor_t* cursor, const uint8_t* landing_
const ExcData* exc_data) {
exc_data->check();
assert(landing_pad);
if (VERBOSITY("cxx_unwind") >= 2)
if (VERBOSITY("cxx_unwind") >= 4)
printf(" * RESUMED: ip %p switch_value %ld\n", (const void*)landing_pad, (long)switch_value);
if (0 != switch_value) {
......@@ -465,7 +465,7 @@ static inline int64_t determine_action(const lsda_info_t* info, const call_site_
return CLEANUP_ACTION;
// Read a chain of actions.
if (VERBOSITY("cxx_unwind") >= 3) {
if (VERBOSITY("cxx_unwind") >= 5) {
printf(" reading action chain\n");
}
......@@ -477,7 +477,7 @@ static inline int64_t determine_action(const lsda_info_t* info, const call_site_
ptrdiff_t offset = p - info->action_table;
int64_t type_filter;
p = next_action(p, &type_filter);
if (VERBOSITY("cxx_unwind") >= 3) {
if (VERBOSITY("cxx_unwind") >= 5) {
if (p)
printf(" %ld: filter %ld next %ld\n", offset, type_filter, p - info->action_table);
else
......@@ -539,7 +539,7 @@ static inline void unwind_loop(const ExcData* exc_data) {
assert((pip.lsda == 0) == (pip.handler == 0));
assert(pip.flags == 0);
if (VERBOSITY("cxx_unwind") >= 2) {
if (VERBOSITY("cxx_unwind") >= 4) {
print_frame(&cursor, &pip);
}
......@@ -586,7 +586,7 @@ static inline void unwind_loop(const ExcData* exc_data) {
}
// After this point we are guaranteed to resume something rather than unwinding further.
if (VERBOSITY("cxx_unwind") >= 3) {
if (VERBOSITY("cxx_unwind") >= 4) {
print_lsda(&info);
}
......@@ -633,7 +633,7 @@ extern "C" void _Unwind_Resume(struct _Unwind_Exception* _exc) {
#endif
pyston::us_unwind_cleanup.log(pyston::per_thread_cleanup_timer.end());
if (VERBOSITY("cxx_unwind"))
if (VERBOSITY("cxx_unwind") >= 4)
printf("***** _Unwind_Resume() *****\n");
// we give `_exc' type `struct _Unwind_Exception*' because unwind.h demands it; it's not actually accurate
const pyston::ExcData* data = (const pyston::ExcData*)_exc;
......@@ -680,7 +680,7 @@ extern "C" void* __cxa_begin_catch(void* exc_obj_in) noexcept {
assert(exc_obj_in);
pyston::us_unwind_resume_catch.log(pyston::per_thread_resume_catch_timer.end());
if (VERBOSITY("cxx_unwind"))
if (VERBOSITY("cxx_unwind") >= 4)
printf("***** __cxa_begin_catch() *****\n");
pyston::ExcData* e = (pyston::ExcData*)exc_obj_in;
......@@ -689,7 +689,7 @@ extern "C" void* __cxa_begin_catch(void* exc_obj_in) noexcept {
}
extern "C" void __cxa_end_catch() {
if (VERBOSITY("cxx_unwind"))
if (VERBOSITY("cxx_unwind") >= 4)
printf("***** __cxa_end_catch() *****\n");
// See comment in __cxa_begin_catch for why we don't clear the exception ferry here.
}
......@@ -704,7 +704,7 @@ extern "C" void __cxa_throw(void* exc_obj, std::type_info* tinfo, void (*dtor)(v
assert(exc_obj);
RELEASE_ASSERT(tinfo == &EXCINFO_TYPE_INFO, "can't throw a non-ExcInfo value! type info: %p", tinfo);
if (VERBOSITY("cxx_unwind"))
if (VERBOSITY("cxx_unwind") >= 4)
printf("***** __cxa_throw() *****\n");
const pyston::ExcData* exc_data = (const pyston::ExcData*)exc_obj;
......
import argparse
import sys
if __name__ == "__main__":
fn = sys.argv[1]
parser = argparse.ArgumentParser()
parser.add_argument("tracebacks_file", action="store", default=None)
parser.add_argument("--num-display", action="store", default=6, type=int)
parser.add_argument("--dedup-frames", action="store", default=None, type=int)
parser.add_argument("--dedup-file", action="store_true")
parser.add_argument("--dedup-function", action="store_true")
args = parser.parse_args()
num_display = args.num_display
key_func = None
def traceback_locations(t):
return [l for l in t.split('\n') if l.startswith(" File")]
if args.dedup_frames:
assert not key_func
key_func = lambda t: '\n'.join(t.split('\n')[-2 * args.dedup_frames:]) # last 4 stack frames
if args.dedup_file:
assert not key_func
key_func = lambda t: traceback_locations(t)[-1].split('"')[1]
if args.dedup_function:
assert not key_func
def key_func(t):
locations = traceback_locations(t)
last_file = locations[-1].split('"')[1]
last_function = locations[-1].split()[-1][:-1]
return "%s::%s()" % (last_file, last_function)
if not key_func:
key_func = lambda t: t
tracebacks = []
cur_traceback = []
with open(fn) as f:
with open(args.tracebacks_file) as f:
for l in f:
if l.startswith("Traceback"):
if cur_traceback:
......@@ -22,36 +50,24 @@ if __name__ == "__main__":
counts = {}
for t in tracebacks:
locations = [l for l in t.split('\n') if l.startswith(" File")]
last_file = locations[-1].split('"')[1]
last_function = locations[-1].split()[-1][:-1]
# dedupe on:
# key = t # full traceback
# key = '\n'.join(t.split('\n')[-8:]) # last 4 stack frames
# key = '\n'.join(t.split('\n')[-4:]) # last 2 stack frames
# key = '\n'.join(t.split('\n')[-2:]) # last stack frame
# key = last_file, last_function
key = last_file
key = key_func(t)
counts[key] = counts.get(key, 0) + 1
n = len(tracebacks)
NUM_DISPLAY = 6
entries = sorted(counts.items(), key=lambda (k, v): v)
if len(counts) > NUM_DISPLAY:
if len(counts) > num_display:
num_hidden = 0
counts_hidden = 0
for k, v in entries[:-NUM_DISPLAY]:
for k, v in entries[:-num_display]:
num_hidden += 1
counts_hidden += v
print "Hiding %d entries that occurred %d (%.1f%%) times" % (num_hidden, counts_hidden, 100.0 * counts_hidden / n)
for k, v in sorted(counts.items(), key=lambda (k, v): v)[-NUM_DISPLAY:]:
for k, v in sorted(counts.items(), key=lambda (k, v): v)[-num_display:]:
print
print "Occurs %d (%.1f%%) times:" % (v, 100.0 * v / n)
print k
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment