Commit 1a6b1e0c authored by Kevin Modzelewski's avatar Kevin Modzelewski

Add scratch space to our runtime ics

(scratch space is pre-allocated stack space since the IC can't
allocate new space itself, in order to have unwinding work)

Allocate a fixed amount (currently: 40 bytes) of extra stack space
in our runtime ics.  This involves changing the function prologue+
epilogue to do more rsp adjustment, and modifying the .eh_frame sections
we generate.

One tricky thing is that we currently use frame pointer elimination
in our runtime ics, but the rest of the scratch space logic had assumed
the scratch would be rbp-relative, which I had to convert to rsp-relative.
parent c88e3b48
...@@ -95,19 +95,22 @@ void ICSlotRewrite::commit(CommitHook* hook) { ...@@ -95,19 +95,22 @@ void ICSlotRewrite::commit(CommitHook* hook) {
if (ic_entry == NULL) if (ic_entry == NULL)
return; return;
for (int i = 0; i < dependencies.size(); i++) {
ICInvalidator* invalidator = dependencies[i].first;
invalidator->addDependent(ic_entry);
}
uint8_t* slot_start = (uint8_t*)ic->start_addr + ic_entry->idx * ic->getSlotSize(); uint8_t* slot_start = (uint8_t*)ic->start_addr + ic_entry->idx * ic->getSlotSize();
uint8_t* continue_point = (uint8_t*)ic->continue_addr; uint8_t* continue_point = (uint8_t*)ic->continue_addr;
hook->finishAssembly(ic_entry, continue_point - slot_start); bool do_commit = hook->finishAssembly(ic_entry, continue_point - slot_start);
if (!do_commit)
return;
assert(assembler->isExactlyFull()); assert(assembler->isExactlyFull());
assert(!assembler->hasFailed()); assert(!assembler->hasFailed());
for (int i = 0; i < dependencies.size(); i++) {
ICInvalidator* invalidator = dependencies[i].first;
invalidator->addDependent(ic_entry);
}
// if (VERBOSITY()) printf("Commiting to %p-%p\n", start, start + ic->slot_size); // if (VERBOSITY()) printf("Commiting to %p-%p\n", start, start + ic->slot_size);
memcpy(slot_start, buf, ic->getSlotSize()); memcpy(slot_start, buf, ic->getSlotSize());
...@@ -129,18 +132,13 @@ int ICSlotRewrite::getSlotSize() { ...@@ -129,18 +132,13 @@ int ICSlotRewrite::getSlotSize() {
return ic->getSlotSize(); return ic->getSlotSize();
} }
int ICSlotRewrite::getFuncStackSize() { int ICSlotRewrite::getScratchRspOffset() {
return ic->stack_info.stack_size; assert(ic->stack_info.scratch_size);
} return ic->stack_info.scratch_rsp_offset;
int ICSlotRewrite::getScratchRbpOffset() {
assert(ic->stack_info.scratch_bytes);
return ic->stack_info.scratch_rbp_offset;
} }
int ICSlotRewrite::getScratchBytes() { int ICSlotRewrite::getScratchSize() {
assert(ic->stack_info.scratch_bytes); return ic->stack_info.scratch_size;
return ic->stack_info.scratch_bytes;
} }
TypeRecorder* ICSlotRewrite::getTypeRecorder() { TypeRecorder* ICSlotRewrite::getTypeRecorder() {
......
...@@ -48,7 +48,7 @@ public: ...@@ -48,7 +48,7 @@ public:
class CommitHook { class CommitHook {
public: public:
virtual ~CommitHook() {} virtual ~CommitHook() {}
virtual void finishAssembly(ICSlotInfo* picked_slot, int fastpath_offset) = 0; virtual bool finishAssembly(ICSlotInfo* picked_slot, int fastpath_offset) = 0;
}; };
private: private:
...@@ -67,9 +67,8 @@ public: ...@@ -67,9 +67,8 @@ public:
assembler::Assembler* getAssembler() { return assembler; } assembler::Assembler* getAssembler() { return assembler; }
int getSlotSize(); int getSlotSize();
int getFuncStackSize(); int getScratchRspOffset();
int getScratchRbpOffset(); int getScratchSize();
int getScratchBytes();
TypeRecorder* getTypeRecorder(); TypeRecorder* getTypeRecorder();
......
...@@ -952,18 +952,20 @@ void Rewriter::commit() { ...@@ -952,18 +952,20 @@ void Rewriter::commit() {
return; return;
} }
rewrite->commit(this);
if (assembler->hasFailed()) {
on_assemblyfail();
return;
}
finished = true; finished = true;
static StatCounter rewriter_commits("rewriter_commits"); static StatCounter rewriter_commits("rewriter_commits");
rewriter_commits.log(); rewriter_commits.log();
// TODO: have to check that we have enough room to write the final jmp
rewrite->commit(this);
assert(!assembler->hasFailed());
} }
void Rewriter::finishAssembly(ICSlotInfo* picked_slot, int continue_offset) { bool Rewriter::finishAssembly(ICSlotInfo* picked_slot, int continue_offset) {
if (marked_inside_ic) { if (marked_inside_ic) {
void* mark_addr = &picked_slot->num_inside; void* mark_addr = &picked_slot->num_inside;
...@@ -977,6 +979,8 @@ void Rewriter::finishAssembly(ICSlotInfo* picked_slot, int continue_offset) { ...@@ -977,6 +979,8 @@ void Rewriter::finishAssembly(ICSlotInfo* picked_slot, int continue_offset) {
assembler->jmp(assembler::JumpDestination::fromStart(continue_offset)); assembler->jmp(assembler::JumpDestination::fromStart(continue_offset));
assembler->fillWithNops(); assembler->fillWithNops();
return !assembler->hasFailed();
} }
void Rewriter::commitReturning(RewriterVar* var) { void Rewriter::commitReturning(RewriterVar* var) {
...@@ -995,14 +999,14 @@ void Rewriter::addDependenceOn(ICInvalidator& invalidator) { ...@@ -995,14 +999,14 @@ void Rewriter::addDependenceOn(ICInvalidator& invalidator) {
Location Rewriter::allocScratch() { Location Rewriter::allocScratch() {
assertPhaseEmitting(); assertPhaseEmitting();
int scratch_bytes = rewrite->getScratchBytes(); int scratch_size = rewrite->getScratchSize();
for (int i = 0; i < scratch_bytes; i += 8) { for (int i = 0; i < scratch_size; i += 8) {
Location l(Location::Scratch, i); Location l(Location::Scratch, i);
if (vars_by_location.count(l) == 0) { if (vars_by_location.count(l) == 0) {
return l; return l;
} }
} }
RELEASE_ASSERT(0, "Using all %d bytes of scratch!", scratch_bytes); RELEASE_ASSERT(0, "Using all %d bytes of scratch!", scratch_size);
} }
RewriterVar* Rewriter::add(RewriterVar* a, int64_t b, Location dest) { RewriterVar* Rewriter::add(RewriterVar* a, int64_t b, Location dest) {
...@@ -1042,9 +1046,9 @@ RewriterVar* Rewriter::allocate(int n) { ...@@ -1042,9 +1046,9 @@ RewriterVar* Rewriter::allocate(int n) {
int Rewriter::_allocate(RewriterVar* result, int n) { int Rewriter::_allocate(RewriterVar* result, int n) {
assert(n >= 1); assert(n >= 1);
int scratch_bytes = rewrite->getScratchBytes(); int scratch_size = rewrite->getScratchSize();
int consec = 0; int consec = 0;
for (int i = 0; i < scratch_bytes; i += 8) { for (int i = 0; i < scratch_size; i += 8) {
Location l(Location::Scratch, i); Location l(Location::Scratch, i);
if (vars_by_location.count(l) == 0) { if (vars_by_location.count(l) == 0) {
consec++; consec++;
...@@ -1056,8 +1060,8 @@ int Rewriter::_allocate(RewriterVar* result, int n) { ...@@ -1056,8 +1060,8 @@ int Rewriter::_allocate(RewriterVar* result, int n) {
// TODO should be a LEA instruction // TODO should be a LEA instruction
// In fact, we could do something like we do for constants and only load // In fact, we could do something like we do for constants and only load
// this when necessary, so it won't spill. Is that worth? // this when necessary, so it won't spill. Is that worth?
assembler->mov(assembler::RBP, r); assembler->mov(assembler::RSP, r);
assembler->add(assembler::Immediate(8 * a + rewrite->getScratchRbpOffset()), r); assembler->add(assembler::Immediate(8 * a + rewrite->getScratchRspOffset()), r);
// Put placeholders in so the array space doesn't get re-allocated. // Put placeholders in so the array space doesn't get re-allocated.
// This won't get collected, but that's fine. // This won't get collected, but that's fine.
...@@ -1074,7 +1078,7 @@ int Rewriter::_allocate(RewriterVar* result, int n) { ...@@ -1074,7 +1078,7 @@ int Rewriter::_allocate(RewriterVar* result, int n) {
consec = 0; consec = 0;
} }
} }
RELEASE_ASSERT(0, "Using all %d bytes of scratch!", scratch_bytes); RELEASE_ASSERT(0, "Using all %d bytes of scratch!", scratch_size);
} }
RewriterVar* Rewriter::allocateAndCopy(RewriterVar* array_ptr, int n) { RewriterVar* Rewriter::allocateAndCopy(RewriterVar* array_ptr, int n) {
...@@ -1094,7 +1098,7 @@ void Rewriter::_allocateAndCopy(RewriterVar* result, RewriterVar* array_ptr, int ...@@ -1094,7 +1098,7 @@ void Rewriter::_allocateAndCopy(RewriterVar* result, RewriterVar* array_ptr, int
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
assembler->mov(assembler::Indirect(src_ptr, 8 * i), tmp); assembler->mov(assembler::Indirect(src_ptr, 8 * i), tmp);
assembler->mov(tmp, assembler::Indirect(assembler::RBP, 8 * (offset + i) + rewrite->getScratchRbpOffset())); assembler->mov(tmp, assembler::Indirect(assembler::RSP, 8 * (offset + i) + rewrite->getScratchRspOffset()));
} }
array_ptr->bumpUse(); array_ptr->bumpUse();
...@@ -1121,7 +1125,7 @@ void Rewriter::_allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_ele ...@@ -1121,7 +1125,7 @@ void Rewriter::_allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_ele
int offset = _allocate(result, n_rest + 1); int offset = _allocate(result, n_rest + 1);
assembler::Register tmp = first_elem->getInReg(); assembler::Register tmp = first_elem->getInReg();
assembler->mov(tmp, assembler::Indirect(assembler::RBP, 8 * offset + rewrite->getScratchRbpOffset())); assembler->mov(tmp, assembler::Indirect(assembler::RSP, 8 * offset + rewrite->getScratchRspOffset()));
if (n_rest > 0) { if (n_rest > 0) {
assembler::Register src_ptr = rest_ptr->getInReg(); assembler::Register src_ptr = rest_ptr->getInReg();
...@@ -1131,7 +1135,7 @@ void Rewriter::_allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_ele ...@@ -1131,7 +1135,7 @@ void Rewriter::_allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_ele
for (int i = 0; i < n_rest; i++) { for (int i = 0; i < n_rest; i++) {
assembler->mov(assembler::Indirect(src_ptr, 8 * i), tmp); assembler->mov(assembler::Indirect(src_ptr, 8 * i), tmp);
assembler->mov(tmp, assembler->mov(tmp,
assembler::Indirect(assembler::RBP, 8 * (offset + i + 1) + rewrite->getScratchRbpOffset())); assembler::Indirect(assembler::RSP, 8 * (offset + i + 1) + rewrite->getScratchRspOffset()));
} }
rest_ptr->bumpUse(); rest_ptr->bumpUse();
} }
...@@ -1146,8 +1150,7 @@ assembler::Indirect Rewriter::indirectFor(Location l) { ...@@ -1146,8 +1150,7 @@ assembler::Indirect Rewriter::indirectFor(Location l) {
assert(l.type == Location::Scratch || l.type == Location::Stack); assert(l.type == Location::Scratch || l.type == Location::Stack);
if (l.type == Location::Scratch) if (l.type == Location::Scratch)
// TODO it can sometimes be more efficient to do RSP-relative addressing? return assembler::Indirect(assembler::RSP, rewrite->getScratchRspOffset() + l.scratch_offset);
return assembler::Indirect(assembler::RBP, rewrite->getScratchRbpOffset() + l.scratch_offset);
else else
return assembler::Indirect(assembler::RSP, l.stack_offset); return assembler::Indirect(assembler::RSP, l.stack_offset);
} }
...@@ -1414,6 +1417,18 @@ Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>& ...@@ -1414,6 +1417,18 @@ Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>&
this->live_outs.push_back(var); this->live_outs.push_back(var);
this->live_out_regs.push_back(dwarf_regnum); this->live_out_regs.push_back(dwarf_regnum);
} }
// Getting the scratch space location/size wrong could be disastrous and hard to track down,
// so here's a "forcefully check it" mode, which starts every inline cache by overwriting
// the entire scratch space.
bool VALIDATE_SCRATCH_SPACE = false;
if (VALIDATE_SCRATCH_SPACE) {
int scratch_size = rewrite->getScratchSize();
for (int i = 0; i < scratch_size; i += 8) {
assembler->movq(assembler::Immediate(0x12345678UL),
assembler::Indirect(assembler::RSP, i + rewrite->getScratchRspOffset()));
}
}
} }
Rewriter* Rewriter::createRewriter(void* rtn_addr, int num_args, const char* debug_name) { Rewriter* Rewriter::createRewriter(void* rtn_addr, int num_args, const char* debug_name) {
......
...@@ -384,7 +384,7 @@ private: ...@@ -384,7 +384,7 @@ private:
// Do the bookkeeping to say that var is no longer in location l // Do the bookkeeping to say that var is no longer in location l
void removeLocationFromVar(RewriterVar* var, Location l); void removeLocationFromVar(RewriterVar* var, Location l);
void finishAssembly(ICSlotInfo* picked_slot, int continue_offset) override; bool finishAssembly(ICSlotInfo* picked_slot, int continue_offset) override;
void _trap(); void _trap();
void _loadConst(RewriterVar* result, int64_t val, Location loc); void _loadConst(RewriterVar* result, int64_t val, Location loc);
......
...@@ -15,13 +15,18 @@ ...@@ -15,13 +15,18 @@
#ifndef PYSTON_ASMWRITING_TYPES_H #ifndef PYSTON_ASMWRITING_TYPES_H
#define PYSTON_ASMWRITING_TYPES_H #define PYSTON_ASMWRITING_TYPES_H
#include "core/common.h"
namespace pyston { namespace pyston {
struct StackInfo { struct StackInfo {
int stack_size; int scratch_size;
int scratch_rsp_offset;
int scratch_bytes; StackInfo(int scratch_size, int scratch_rsp_offset)
int scratch_rbp_offset; : scratch_size(scratch_size), scratch_rsp_offset(scratch_rsp_offset) {
assert(scratch_rsp_offset >= 0);
}
}; };
namespace assembler { namespace assembler {
......
...@@ -386,9 +386,6 @@ extern "C" PyObject* Py_InitModule4(const char* name, PyMethodDef* methods, cons ...@@ -386,9 +386,6 @@ extern "C" PyObject* Py_InitModule4(const char* name, PyMethodDef* methods, cons
Box* passthrough = static_cast<Box*>(self); Box* passthrough = static_cast<Box*>(self);
while (methods && methods->ml_name) { while (methods && methods->ml_name) {
if (VERBOSITY())
printf("Loading method %s\n", methods->ml_name);
RELEASE_ASSERT((methods->ml_flags & (~(METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O))) == 0, "%d", RELEASE_ASSERT((methods->ml_flags & (~(METH_VARARGS | METH_KEYWORDS | METH_NOARGS | METH_O))) == 0, "%d",
methods->ml_flags); methods->ml_flags);
module->giveAttr(methods->ml_name, module->giveAttr(methods->ml_name,
......
...@@ -1810,7 +1810,7 @@ private: ...@@ -1810,7 +1810,7 @@ private:
// Maybe if there are a ton of live variables it'd be nice to have them be // Maybe if there are a ton of live variables it'd be nice to have them be
// heap-allocated, or if we don't immediately return the result of the OSR? // heap-allocated, or if we don't immediately return the result of the OSR?
bool use_malloc = false; bool use_malloc = false;
if (false) { if (use_malloc) {
llvm::Value* n_bytes = getConstantInt((sorted_symbol_table.size() - 3) * sizeof(Box*), g.i64); llvm::Value* n_bytes = getConstantInt((sorted_symbol_table.size() - 3) * sizeof(Box*), g.i64);
llvm::Value* l_malloc = embedConstantPtr( llvm::Value* l_malloc = embedConstantPtr(
(void*)malloc, llvm::FunctionType::get(g.i8->getPointerTo(), g.i64, false)->getPointerTo()); (void*)malloc, llvm::FunctionType::get(g.i8->getPointerTo(), g.i64, false)->getPointerTo());
...@@ -1818,7 +1818,10 @@ private: ...@@ -1818,7 +1818,10 @@ private:
arg_array = emitter.getBuilder()->CreateBitCast(malloc_save, g.llvm_value_type_ptr->getPointerTo()); arg_array = emitter.getBuilder()->CreateBitCast(malloc_save, g.llvm_value_type_ptr->getPointerTo());
} else { } else {
llvm::Value* n_varargs = llvm::ConstantInt::get(g.i64, sorted_symbol_table.size() - 3, false); llvm::Value* n_varargs = llvm::ConstantInt::get(g.i64, sorted_symbol_table.size() - 3, false);
arg_array = emitter.getBuilder()->CreateAlloca(g.llvm_value_type_ptr, n_varargs); // TODO we have a number of allocas with non-overlapping lifetimes, that end up
// being redundant.
arg_array = new llvm::AllocaInst(g.llvm_value_type_ptr, n_varargs, "",
irstate->getLLVMFunction()->getEntryBlock().getFirstInsertionPt());
} }
} }
......
...@@ -248,9 +248,22 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) { ...@@ -248,9 +248,22 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
assert(pp->numICStackmapArgs() == 0); // don't do anything with these for now assert(pp->numICStackmapArgs() == 0); // don't do anything with these for now
std::unique_ptr<ICInfo> icinfo = registerCompiledPatchpoint( // We currently specify the scratch's location as an RSP offset, but LLVM gives it
start_addr, slowpath_start, end_addr, slowpath_rtn_addr, ic, // to us as an RBP offset. It's easy to convert between them if the function has a static
StackInfo({ stack_size, scratch_size, scratch_rbp_offset }), std::move(live_outs)); // stack size, but if the function doesn't have a fixed stack size (which happens if there
// is a non-static alloca), then we can't convert.
// Internally, it's easy enough to handle either rsp-relative or rbp-relative offsets
// for the scratch array, but there are some places that require the use of rsp-relative
// offsets, and we don't (yet) have the ability to specify on a per-patchpoint basis
// which one we want to use.
RELEASE_ASSERT(stack_size >= 0, "function does not have static stack size!");
// (rbp - rsp) == (stack_size - 8) -- the "-8" is from the value of rbp being pushed onto the stack
int scratch_rsp_offset = scratch_rbp_offset + (stack_size - 8);
std::unique_ptr<ICInfo> icinfo
= registerCompiledPatchpoint(start_addr, slowpath_start, end_addr, slowpath_rtn_addr, ic,
StackInfo(scratch_size, scratch_rsp_offset), std::move(live_outs));
assert(cf); assert(cf);
// TODO: unsafe. hard to use a unique_ptr here though. // TODO: unsafe. hard to use a unique_ptr here though.
......
...@@ -522,8 +522,9 @@ ExcInfo* getFrameExcInfo() { ...@@ -522,8 +522,9 @@ ExcInfo* getFrameExcInfo() {
*copy_from_exc = ExcInfo(None, None, None); *copy_from_exc = ExcInfo(None, None, None);
} }
assert(copy_from_exc->value); assert(gc::isValidGCObject(copy_from_exc->type));
assert(copy_from_exc->traceback); assert(gc::isValidGCObject(copy_from_exc->value));
assert(gc::isValidGCObject(copy_from_exc->traceback));
for (auto* ex : to_update) { for (auto* ex : to_update) {
*ex = *copy_from_exc; *ex = *copy_from_exc;
......
...@@ -42,25 +42,29 @@ namespace pyston { ...@@ -42,25 +42,29 @@ namespace pyston {
// //
// This template is generated from this C++ file: // This template is generated from this C++ file:
// //
// extern void foo(); // extern void foo(void*);
// int bar() { // int bar() {
// foo(); // char buf[N];
// foo(&buf);
// return 1; // return 1;
// } // }
// //
// (where N is the extra bytes of stack to allocate)
//
// objdump -s -j .eh_frame test // objdump -s -j .eh_frame test
// readelf -w test // readelf -w test
// //
#if RUNTIMEICS_OMIT_FRAME_PTR #if RUNTIMEICS_OMIT_FRAME_PTR
// clang++ test.cpp -o test -O3 -fomit-frame-pointer -c // clang++ test.cpp -o test -O3 -fomit-frame-pointer -c -DN=40
// The generated assembly is: // The generated assembly is:
// //
// 0: 50 push %rax // 0: 48 83 ec 28 sub $0x28,%rsp
// 1: e8 00 00 00 00 callq 6 <_Z3barv+0x6> // 4: 48 8d 3c 24 lea (%rsp),%rdi
// 6: b8 01 00 00 00 mov $0x1,%eax // 8: e8 00 00 00 00 callq d <_Z3barv+0xd>
// b: 5a pop %rdx // d: b8 01 00 00 00 mov $0x1,%eax
// c: c3 retq // 12: 48 83 c4 28 add $0x28,%rsp
// 16: c3 retq
// //
// (I believe the push/pop are for stack alignment) // (I believe the push/pop are for stack alignment)
// //
...@@ -84,24 +88,26 @@ static const char _eh_frame_template[] = ...@@ -84,24 +88,26 @@ static const char _eh_frame_template[] =
"\x00\x00\x00\x00" // prcel offset to function address [to be filled in] "\x00\x00\x00\x00" // prcel offset to function address [to be filled in]
"\x0d\x00\x00\x00" // function size [to be filled in] "\x0d\x00\x00\x00" // function size [to be filled in]
"\x00" // augmentation data (none) "\x00" // augmentation data (none)
"\x41\x0e\x10" "\x44\x0e\x30"
// Instructions: // Instructions:
// - DW_CFA_advance_loc: 1 to 00000001 // - DW_CFA_advance_loc: 4 to 00000004
// - DW_CFA_def_cfa_offset: 16 // - DW_CFA_def_cfa_offset: 48
"\x00\x00\x00\x00" // padding "\x00\x00\x00\x00" // padding
"\x00\x00\x00\x00" // terminator "\x00\x00\x00\x00" // terminator
; ;
#else #else
// clang++ test.cpp -o test -O3 -fno-omit-frame-pointer -c // clang++ test.cpp -o test -O3 -fno-omit-frame-pointer -c -DN=40
// The generated assembly is: // The generated assembly is:
//
// 0: 55 push %rbp // 0: 55 push %rbp
// 1: 48 89 e5 mov %rsp,%rbp // 1: 48 89 e5 mov %rsp,%rbp
// 4: e8 00 00 00 00 callq 9 <_Z3barv+0x9> // 4: 48 83 ec 30 sub $0x30,%rsp
// 9: b8 01 00 00 00 mov $0x1,%eax // 8: 48 8d 7d d0 lea -0x30(%rbp),%rdi
// e: 5d pop %rbp // c: e8 00 00 00 00 callq 11 <_Z3barv+0x11>
// f: c3 retq // 11: b8 01 00 00 00 mov $0x1,%eax
// 16: 48 83 c4 30 add $0x30,%rsp
// 1a: 5d pop %rbp
// 1b: c3 retq
// //
static const char _eh_frame_template[] = static const char _eh_frame_template[] =
// CIE // CIE
...@@ -152,31 +158,53 @@ static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t f ...@@ -152,31 +158,53 @@ static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t f
*size_ptr = func_size; *size_ptr = func_size;
} }
#if RUNTIMEICS_OMIT_FRAME_PTR
// If you change this, you *must* update the value in _eh_frame_template
// (set the -9'th byte to this value plus 8)
#define SCRATCH_BYTES 0x28
#else
#define SCRATCH_BYTES 0x30
#endif
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) { RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) {
static StatCounter sc("runtime_ics_num"); static StatCounter sc("runtime_ics_num");
sc.log(); sc.log();
if (ENABLE_RUNTIME_ICS) { if (ENABLE_RUNTIME_ICS) {
assert(SCRATCH_BYTES >= 0);
assert(SCRATCH_BYTES < 0x80); // This would break both the instruction encoding and the dwarf encoding
assert(SCRATCH_BYTES % 8 == 0);
#if RUNTIMEICS_OMIT_FRAME_PTR #if RUNTIMEICS_OMIT_FRAME_PTR
static const int PROLOGUE_SIZE = 1;
#else
/* /*
* We emit a prologue since we want to align the stack pointer, * prologue:
* and also use RBP. * sub $0x28, %rsp # 48 83 ec 28
* It's not clear if we need to use RBP or not, since we emit the .eh_frame section anyway. *
* epilogue:
* add $0x28, %rsp # 48 83 c4 28
* retq # c3
* *
*/
static const int PROLOGUE_SIZE = 4;
static const int EPILOGUE_SIZE = 5;
assert(SCRATCH_BYTES % 16 == 8);
#else
/*
* The prologue looks like: * The prologue looks like:
* push %rbp # 55 * push %rbp # 55
* mov %rsp, %rbp # 48 89 e5 * mov %rsp, %rbp # 48 89 e5
* sub $0x30, %rsp # 48 83 ec 30
* *
* The epilogue is: * The epilogue is:
* add $0x30, %rsp # 48 83 c4 30
* pop %rbp # 5d * pop %rbp # 5d
* retq # c3 * retq # c3
*/ */
static const int PROLOGUE_SIZE = 4; static const int PROLOGUE_SIZE = 8;
static const int EPILOGUE_SIZE = 6;
assert(SCRATCH_BYTES % 16 == 0);
#endif #endif
static const int CALL_SIZE = 13; static const int CALL_SIZE = 13;
static const int EPILOGUE_SIZE = 2;
int patchable_size = num_slots * slot_size; int patchable_size = num_slots * slot_size;
int total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE; int total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE;
...@@ -198,23 +226,28 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) { ...@@ -198,23 +226,28 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) {
assert(p.first == pp_start + patchable_size); assert(p.first == pp_start + patchable_size);
assert(p.second == pp_end); assert(p.second == pp_end);
StackInfo stack_info(SCRATCH_BYTES, 0);
icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(), icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(),
StackInfo(), std::unordered_set<int>()); stack_info, std::unordered_set<int>());
assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE); assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR #if RUNTIMEICS_OMIT_FRAME_PTR
prologue_assem.push(assembler::RAX); // If SCRATCH_BYTES is 8 or less, we could use more compact instruction encodings
// (push instead of sub), but it doesn't seem worth it for now.
prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#else #else
prologue_assem.push(assembler::RBP); prologue_assem.push(assembler::RBP);
prologue_assem.mov(assembler::RSP, assembler::RBP); prologue_assem.mov(assembler::RSP, assembler::RBP);
prologue_assem.sub(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#endif #endif
assert(!prologue_assem.hasFailed()); assert(!prologue_assem.hasFailed());
assert(prologue_assem.isExactlyFull()); assert(prologue_assem.isExactlyFull());
assembler::Assembler epilogue_assem(pp_end, EPILOGUE_SIZE); assembler::Assembler epilogue_assem(pp_end, EPILOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR #if RUNTIMEICS_OMIT_FRAME_PTR
epilogue_assem.pop(assembler::RDX); epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
#else #else
epilogue_assem.add(assembler::Immediate(SCRATCH_BYTES), assembler::RSP);
epilogue_assem.pop(assembler::RBP); epilogue_assem.pop(assembler::RBP);
#endif #endif
epilogue_assem.retq(); epilogue_assem.retq();
......
...@@ -20,6 +20,8 @@ for i in xrange(400): ...@@ -20,6 +20,8 @@ for i in xrange(400):
while nworkers >= MAX_WORKERS: while nworkers >= MAX_WORKERS:
cv.wait() cv.wait()
nworkers += 1
t = threading.Thread(target=worker) t = threading.Thread(target=worker)
t.start() t.start()
threads.append(t) threads.append(t)
......
# skip-if: True
# - this is blocking on some rewriter stuff
import urlparse import urlparse
print urlparse.urlparse("http://www.dropbox.com") print urlparse.urlparse("http://www.dropbox.com")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment