Commit f9021356 authored by Marius Wachtler's avatar Marius Wachtler

baseline jit: patch block transitions to a direct jump.

Before we emitted a runtime check to check if the block has been JITed or if we have to fallback to the interpreter.
Now we always generate a exit to the interpreter if the block is not yet JITed and patch the exit to a direct jump
later when we have successfully generated code for the new block.
This also removes the epilog and replaces it with a direct 'leave ret' combo which saves space and an additional jump.
parent 5358470c
......@@ -899,6 +899,7 @@ void Assembler::jmp(JumpDestination dest) {
emitByte(0xeb);
emitByte(offset);
} else {
assert((-1L << 31) <= dest.offset && dest.offset < (1L << 31) - 1);
offset -= 3;
emitByte(0xe9);
emitInt(offset, 4);
......
......@@ -144,7 +144,8 @@ private:
Value visit_jump(AST_Jump* node);
Value visit_langPrimitive(AST_LangPrimitive* node);
void startJITing(CFGBlock* block, int jump_offset = 0);
// for doc on 'exit_offset' have a look at JitFragmentWriter::num_bytes_exit and num_bytes_overlapping
void startJITing(CFGBlock* block, int exit_offset = 0);
void abortJITing();
void finishJITing(CFGBlock* continue_block = NULL);
......@@ -339,7 +340,7 @@ void RegisterHelper::deregister(void* frame_addr) {
s_interpreterMap.erase(frame_addr);
}
void ASTInterpreter::startJITing(CFGBlock* block, int jump_offset) {
void ASTInterpreter::startJITing(CFGBlock* block, int exit_offset) {
assert(ENABLE_BASELINEJIT);
assert(!jit);
......@@ -351,10 +352,10 @@ void ASTInterpreter::startJITing(CFGBlock* block, int jump_offset) {
if (!code_block || code_block->shouldCreateNewBlock()) {
code_blocks.push_back(std::unique_ptr<JitCodeBlock>(new JitCodeBlock(source_info->getName())));
code_block = code_blocks[code_blocks.size() - 1].get();
jump_offset = 0;
exit_offset = 0;
}
jit = code_block->newFragment(block, jump_offset);
jit = code_block->newFragment(block, exit_offset);
}
void ASTInterpreter::abortJITing() {
......@@ -367,10 +368,10 @@ void ASTInterpreter::abortJITing() {
void ASTInterpreter::finishJITing(CFGBlock* continue_block) {
if (!jit)
return;
int jump_offset = jit->finishCompilation();
int exit_offset = jit->finishCompilation();
jit.reset();
if (continue_block && !continue_block->code)
startJITing(continue_block, jump_offset);
startJITing(continue_block, exit_offset);
}
Value ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at,
......
......@@ -27,13 +27,13 @@
namespace pyston {
static llvm::DenseSet<CFGBlock*> blocks_aborted;
static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
JitCodeBlock::JitCodeBlock(llvm::StringRef name)
: frame_manager(false /* don't omit frame pointers */),
code(new uint8_t[code_size]),
entry_offset(0),
epilog_offset(0),
a(code.get(), code_size - epilog_size),
a(code.get(), code_size),
is_currently_writing(false),
asm_failed(false) {
static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks");
......@@ -53,13 +53,6 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
entry_offset = a.bytesWritten();
// emit epilog
epilog_offset = code_size - epilog_size;
assembler::Assembler endAsm(code.get() + epilog_offset, epilog_size);
endAsm.leave();
endAsm.retq();
RELEASE_ASSERT(!endAsm.hasFailed(), "");
// generate eh frame...
frame_manager.writeAndRegister(code.get(), code_size);
......@@ -82,9 +75,8 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in
llvm::CallingConv::C, live_outs, assembler::RAX, 0));
std::unique_ptr<ICSlotRewrite> rewrite(new ICSlotRewrite(ic_info.get(), ""));
return std::unique_ptr<JitFragmentWriter>(new JitFragmentWriter(block, std::move(ic_info), std::move(rewrite),
fragment_offset, epilog_offset - fragment_offset,
patch_jump_offset, a.getStartAddr(), *this));
return std::unique_ptr<JitFragmentWriter>(new JitFragmentWriter(
block, std::move(ic_info), std::move(rewrite), fragment_offset, patch_jump_offset, a.getStartAddr(), *this));
}
void JitCodeBlock::fragmentAbort(bool not_enough_space) {
......@@ -102,14 +94,13 @@ void JitCodeBlock::fragmentFinished(int bytes_written, int num_bytes_overlapping
JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info,
std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int epilog_offset,
int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block)
std::unique_ptr<ICSlotRewrite> rewrite, int code_offset, int num_bytes_overlapping,
void* entry_code, JitCodeBlock& code_block)
: Rewriter(std::move(rewrite), 0, {}),
block(block),
code_offset(code_offset),
epilog_offset(epilog_offset),
num_bytes_exit(0),
num_bytes_overlapping(num_bytes_overlapping),
num_bytes_forward_jump(0),
entry_code(entry_code),
code_block(code_block),
interp(0),
......@@ -387,7 +378,7 @@ void JitFragmentWriter::emitExec(RewriterVar* code, RewriterVar* globals, Rewrit
void JitFragmentWriter::emitJump(CFGBlock* b) {
RewriterVar* next = imm(b);
addAction([=]() { _emitJump(b, next, num_bytes_forward_jump); }, { next }, ActionType::NORMAL);
addAction([=]() { _emitJump(b, next, num_bytes_exit); }, { next }, ActionType::NORMAL);
}
void JitFragmentWriter::emitOSRPoint(AST_Jump* node) {
......@@ -505,9 +496,31 @@ int JitFragmentWriter::finishCompilation() {
block->code = (void*)((uint64_t)entry_code + code_offset);
block->entry_code = (decltype(block->entry_code))entry_code;
// if any side exits point to this block patch them to a direct jump to this block
auto it = block_patch_locations.find(block);
if (it != block_patch_locations.end()) {
for (void* patch_location : it->second) {
assembler::Assembler patch_asm((uint8_t*)patch_location, min_patch_size);
int64_t offset = (uint64_t)block->code - (uint64_t)patch_location;
if (isLargeConstant(offset)) {
patch_asm.mov(assembler::Immediate(block->code), assembler::R11);
patch_asm.jmpq(assembler::R11);
} else
patch_asm.jmp(assembler::JumpDestination::fromStart(offset));
RELEASE_ASSERT(!patch_asm.hasFailed(), "you may have to increase 'min_patch_size'");
}
block_patch_locations.erase(it);
}
// if we have a side exit, remember its location for patching
if (side_exit_patch_location.first) {
void* patch_location = (uint8_t*)block->code + side_exit_patch_location.second;
block_patch_locations[side_exit_patch_location.first].push_back(patch_location);
}
void* next_fragment_start = (uint8_t*)block->code + assembler->bytesWritten();
code_block.fragmentFinished(assembler->bytesWritten(), num_bytes_overlapping, next_fragment_start);
return num_bytes_forward_jump;
return num_bytes_exit;
}
bool JitFragmentWriter::finishAssembly(int continue_offset) {
......@@ -669,8 +682,8 @@ Box* JitFragmentWriter::unaryopICHelper(UnaryopIC* ic, Box* obj, int op) {
}
void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_indirect_jump) {
size_of_indirect_jump = 0;
void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_exit_to_interp) {
size_of_exit_to_interp = 0;
if (b->code) {
int64_t offset = (uint64_t)b->code - ((uint64_t)entry_code + code_offset);
if (isLargeConstant(offset)) {
......@@ -681,11 +694,15 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& siz
} else {
int num_bytes = assembler->bytesWritten();
block_next->getInReg(assembler::RAX, true);
assembler->mov(assembler::Indirect(assembler::RAX, 8), assembler::RSI);
assembler->test(assembler::RSI, assembler::RSI);
assembler->je(assembler::JumpDestination::fromStart(epilog_offset));
assembler->jmp(assembler::Indirect(assembler::RAX, offsetof(CFGBlock, code)));
size_of_indirect_jump = assembler->bytesWritten() - num_bytes;
assembler->leave();
assembler->retq();
// make sure we have at least 'min_patch_size' of bytes available.
for (int i = assembler->bytesWritten() - num_bytes; i < min_patch_size; ++i)
assembler->trap(); // we could use nops but traps may help if something goes wrong
size_of_exit_to_interp = assembler->bytesWritten() - num_bytes;
assert(assembler->hasFailed() || size_of_exit_to_interp >= min_patch_size);
}
block_next->bumpUse();
}
......@@ -702,7 +719,8 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var
{
assembler::ForwardJump je(*assembler, assembler::COND_EQUAL);
assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor
assembler->jmp(assembler::JumpDestination::fromStart(epilog_offset));
assembler->leave();
assembler->retq();
}
assertConsistent();
......@@ -711,7 +729,8 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var
void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
return_val->getInReg(assembler::RDX, true);
assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor
assembler->jmp(assembler::JumpDestination::fromStart(epilog_offset));
assembler->leave();
assembler->retq();
return_val->bumpUse();
}
......@@ -731,11 +750,12 @@ void JitFragmentWriter::_emitSideExit(RewriterVar* var, RewriterVar* val_constan
{
assembler::ForwardJump jne(*assembler, assembler::COND_EQUAL);
int bytes = 0;
_emitJump(next_block, next_block_var, bytes);
if (bytes) {
// TODO: We generated an indirect jump.
// If we later on JIT the dest block we could patch this code to a direct jump to the dest.
int exit_size = 0;
_emitJump(next_block, next_block_var, exit_size);
if (exit_size) {
RELEASE_ASSERT(!side_exit_patch_location.first,
"if we start to emit more than one side exit we should make this a vector");
side_exit_patch_location = std::make_pair(next_block, assembler->bytesWritten() - exit_size);
}
}
......
......@@ -74,7 +74,7 @@ class JitFragmentWriter;
// Currently a JitFragment always contains the code of a single CFGBlock*.
// A JitFragment can get called from the Interpreter by calling 'entry_code' which will jump to the fragment start or
// it can get executed by a jump from another fragment.
// At every fragment end we can jump to another fragment, fallback to the Interpreter or exit.
// At every fragment end we can jump to another fragment or exit to the interpreter.
// This means we are not allowed to assume that a register contains a specific value between JitFragments.
// This also means that we are allowed to store a Python variable which only lives in the current CFGBLock* inside a
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
......@@ -109,38 +109,34 @@ class JitFragmentWriter;
// movabs $0x1270014108,%rcx ; rcx = True
// cmp %rax,%rcx ; rax == True
// jne end_side_exit
// mov %rax,0x10(%rsp) ;
// movabs $0x215bb60,%rax ; rax = CFGBlock* to call next (rax is also the 1. return reg)
// mov 0x8(%rax),%rsi ; load CFGBlock->code
// test %rsi,%rsi ; CFGBlock->code == 0
// je epilog ; exit to interpreter if code == 0
// jmpq *0x8(%rax) ; jump to new jit fragment (e.g second_JitFragment)
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// leave
// ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit:
// ....
// second_JitFragment:
// ...
// ; this shows how a AST_Return looks like
// mov $0,%rax ; rax contains the next block to interpret.
// in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// leave
// ret
//
// nth_JitFragment:
// ... ; direct jump previous JITed block
// jmp first_JitFragment
//
// epilog: ; code which jumps to epilog has to make sure that
// ; rax contains the next block to execute
// ; or 0 if we are finished but then rdx must contain the Box* value to return
// leave
// ret
//
class JitCodeBlock {
private:
static constexpr int scratch_size = 256;
static constexpr int code_size = 4096 * 2;
static constexpr int epilog_size = 2; // size of [leave, ret] in bytes
EHFrameManager frame_manager;
std::unique_ptr<uint8_t[]> code;
int entry_offset;
int epilog_offset;
assembler::Assembler a;
bool is_currently_writing;
bool asm_failed;
......@@ -156,23 +152,35 @@ public:
class JitFragmentWriter : public Rewriter {
private:
static constexpr int min_patch_size = 13;
CFGBlock* block;
int code_offset; // offset inside the JitCodeBlock to the start of this block
int epilog_offset; // offset inside the JitCodeBlock to the epilog
int num_bytes_overlapping; // num of bytes this block overlaps with the prev. used to patch unessary forward jumps
int num_bytes_forward_jump; // number of bytes emited for the last forward jump to the next block. This is used to
// patch unessary forward jumps when the next fragment is emited (it becomes
// num_bytes_overlapping)
void* entry_code; // JitCodeBlock start address. Mmust have an offset of 0 into the code block
int code_offset; // offset inside the JitCodeBlock to the start of this block
// If the next block is not yet JITed we will set this field to the number of bytes we emitted for the exit to the
// interpreter which continues interpreting the next block.
// If we immediatelly start JITing the next block we will set 'num_bytes_overlapping' on the new fragment to this
// value which will make the fragment start at the instruction where the last block is exiting to the interpreter to
// interpret the new block -> we overwrite the exit with the code of the new block.
// If there is nothing to overwrite this field will be 0.
int num_bytes_exit;
int num_bytes_overlapping; // num of bytes this block overlaps with the prev. used to patch unessary jumps
void* entry_code; // JitCodeBlock start address. Must have an offset of 0 into the code block
JitCodeBlock& code_block;
RewriterVar* interp;
llvm::DenseMap<InternedString, RewriterVar*> local_syms;
std::unique_ptr<ICInfo> ic_info;
// Optional points to a CFGBlock and a patch location which should get patched to a direct jump if
// the specified block gets JITed. The patch location is guaranteed to be at least 'min_patch_size' bytes long.
// We can't directly mark the offset for patching because JITing the current fragment may fail. That's why we store
// it in this field and process it only when we know we successfully generated the code.
std::pair<CFGBlock*, int /* offset from fragment start*/> side_exit_patch_location;
public:
JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic_info, std::unique_ptr<ICSlotRewrite> rewrite,
int code_offset, int epilog_offset, int num_bytes_overlapping, void* entry_code,
JitCodeBlock& code_block);
int code_offset, int num_bytes_overlapping, void* entry_code, JitCodeBlock& code_block);
RewriterVar* imm(uint64_t val);
RewriterVar* imm(void* val);
......@@ -269,7 +277,7 @@ private:
static Box* runtimeCallHelperIC(Box* obj, ArgPassSpec argspec, RuntimeCallIC* ic, Box** args);
#endif
void _emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_indirect_jump);
void _emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_exit_to_interp);
void _emitOSRPoint(RewriterVar* result, RewriterVar* node_var);
void _emitReturn(RewriterVar* v);
void _emitSideExit(RewriterVar* var, RewriterVar* val_constant, CFGBlock* next_block, RewriterVar* false_path);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment