Commit 16eed354 authored by Marius Wachtler's avatar Marius Wachtler

interpreter: Assign fixed slots (vregs) to symbols with fast or closure scope.

This removes a bottleneck of the interpreter/bjit:
most var accesses introduced a DenseMap lookup, with this change we use a fixed offset per var.
The bjit stores the pointer to the vregs array inside r14 for fast accesses.
parent 1c1dcdb9
......@@ -838,7 +838,6 @@ void Assembler::test(Register reg1, Register reg2) {
reg1_idx -= 8;
}
if (reg2_idx >= 8) {
trap();
rex |= REX_B;
reg2_idx -= 8;
}
......
This diff is collapsed.
......@@ -43,12 +43,10 @@ struct ASTInterpreterJitInterface {
static Box* derefHelper(void* interp, InternedString s);
static Box* doOSRHelper(void* interp, AST_Jump* node);
static Box* getLocalHelper(void* interp, InternedString id);
static Box* landingpadHelper(void* interp);
static Box* setExcInfoHelper(void* interp, Box* type, Box* value, Box* traceback);
static Box* uncacheExcInfoHelper(void* interp);
static void setLocalClosureHelper(void* interp, InternedString id, Box* v);
static void setLocalHelper(void* interp, InternedString id, Box* v);
static void setLocalClosureHelper(void* interp, long vreg, InternedString id, Box* v);
};
class RewriterVar;
......
......@@ -36,17 +36,18 @@ static llvm::DenseMap<CFGBlock*, std::vector<void*>> block_patch_locations;
//
// long foo(char* c);
// void bjit() {
// asm volatile ("" ::: "r14");
// asm volatile ("" ::: "r12");
// char scratch[256+16];
// foo(scratch);
// }
//
// It omits the frame pointer but saves R12
// It omits the frame pointer but saves R12 and R14
const unsigned char eh_info[]
= { 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x7a, 0x52, 0x00, 0x01, 0x78, 0x10,
0x01, 0x1b, 0x0c, 0x07, 0x08, 0x90, 0x01, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1c, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x47,
0x0e, 0xa0, 0x02, 0x8c, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x42, 0x0e, 0x10, 0x42,
0x0e, 0x18, 0x47, 0x0e, 0xb0, 0x02, 0x8c, 0x03, 0x8e, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00 };
static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!");
static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!");
......@@ -63,10 +64,12 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
num_jit_total_bytes.log(code_size);
// emit prolog
a.push(assembler::R14);
a.push(assembler::R12);
static_assert(sp_adjustment % 16 == 0, "stack isn't aligned");
static_assert(sp_adjustment % 16 == 8, "stack isn't aligned");
a.sub(assembler::Immediate(sp_adjustment), assembler::RSP);
a.mov(assembler::RDI, assembler::R12); // interpreter pointer
a.mov(assembler::RDX, assembler::R14); // vreg array
a.jmp(assembler::Indirect(assembler::RSI, offsetof(CFGBlock, code))); // jump to block
entry_offset = a.bytesWritten();
......@@ -139,6 +142,10 @@ JitFragmentWriter::JitFragmentWriter(CFGBlock* block, std::unique_ptr<ICInfo> ic
interp = createNewVar();
addLocationToVar(interp, assembler::R12);
interp->setAttr(ASTInterpreterJitInterface::getCurrentBlockOffset(), imm(block));
vregs_array = createNewVar();
addLocationToVar(vregs_array, assembler::R14);
addAction([=]() { vregs_array->bumpUse(); }, vregs_array, ActionType::NORMAL);
}
RewriterVar* JitFragmentWriter::imm(uint64_t val) {
......@@ -279,10 +286,10 @@ RewriterVar* JitFragmentWriter::emitGetAttr(RewriterVar* obj, BoxedString* s, AS
return emitPPCall((void*)getattr, { obj, imm(s) }, 2, 512, getTypeRecorderForNode(node));
}
RewriterVar* JitFragmentWriter::emitGetBlockLocal(InternedString s) {
RewriterVar* JitFragmentWriter::emitGetBlockLocal(InternedString s, int vreg) {
auto it = local_syms.find(s);
if (it == local_syms.end())
return emitGetLocal(s);
return emitGetLocal(s, vreg);
return it->second;
}
......@@ -308,13 +315,11 @@ RewriterVar* JitFragmentWriter::emitGetItem(RewriterVar* value, RewriterVar* sli
return emitPPCall((void*)getitem, { value, slice }, 2, 512);
}
RewriterVar* JitFragmentWriter::emitGetLocal(InternedString s) {
return call(false, (void*)ASTInterpreterJitInterface::getLocalHelper, getInterp(),
#ifndef NDEBUG
imm(asUInt(s).first), imm(asUInt(s).second));
#else
imm(asUInt(s)));
#endif
RewriterVar* JitFragmentWriter::emitGetLocal(InternedString s, int vreg) {
assert(vreg >= 0);
RewriterVar* val_var = vregs_array->getAttr(vreg * 8);
addAction([=]() { _emitGetLocal(val_var, s.c_str()); }, { val_var }, ActionType::NORMAL);
return val_var;
}
RewriterVar* JitFragmentWriter::emitGetPystonIter(RewriterVar* v) {
......@@ -471,17 +476,19 @@ void JitFragmentWriter::emitSetItemName(BoxedString* s, RewriterVar* v) {
emitSetItem(emitGetBoxedLocals(), imm(s), v);
}
void JitFragmentWriter::emitSetLocal(InternedString s, bool set_closure, RewriterVar* v) {
void* func = set_closure ? (void*)ASTInterpreterJitInterface::setLocalClosureHelper
: (void*)ASTInterpreterJitInterface::setLocalHelper;
call(false, func, getInterp(),
void JitFragmentWriter::emitSetLocal(InternedString s, int vreg, bool set_closure, RewriterVar* v) {
assert(vreg >= 0);
if (set_closure) {
call(false, (void*)ASTInterpreterJitInterface::setLocalClosureHelper, getInterp(), imm(vreg),
#ifndef NDEBUG
imm(asUInt(s).first), imm(asUInt(s).second),
imm(asUInt(s).first), imm(asUInt(s).second),
#else
imm(asUInt(s)),
imm(asUInt(s)),
#endif
v);
v);
} else {
vregs_array->setAttr(8 * vreg, v);
}
}
void JitFragmentWriter::emitSideExit(RewriterVar* v, Box* cmp_value, CFGBlock* next_block) {
......@@ -619,6 +626,10 @@ RewriterVar* JitFragmentWriter::emitPPCall(void* func_addr, llvm::ArrayRef<Rewri
#endif
}
void JitFragmentWriter::assertNameDefinedHelper(const char* id) {
assertNameDefined(0, id, UnboundLocalError, true);
}
Box* JitFragmentWriter::callattrHelper(Box* obj, BoxedString* attr, CallattrFlags flags, TypeRecorder* type_recorder,
Box** args, std::vector<BoxedString*>* keyword_names) {
auto arg_tuple = getTupleFromArgsArray(&args[0], flags.argspec.totalPassed());
......@@ -683,6 +694,18 @@ Box* JitFragmentWriter::runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRec
return recordType(type_recorder, r);
}
void JitFragmentWriter::_emitGetLocal(RewriterVar* val_var, const char* name) {
assembler::Register var_reg = val_var->getInReg();
assembler->test(var_reg, var_reg);
val_var->bumpUse();
{
assembler::ForwardJump jnz(*assembler, assembler::COND_NOT_ZERO);
assembler->mov(assembler::Immediate((uint64_t)name), assembler::RDI);
assembler->mov(assembler::Immediate((void*)assertNameDefinedHelper), assembler::R11);
assembler->callq(assembler::R11);
}
}
void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_exit_to_interp) {
size_of_exit_to_interp = 0;
......@@ -698,6 +721,7 @@ void JitFragmentWriter::_emitJump(CFGBlock* b, RewriterVar* block_next, int& siz
block_next->getInReg(assembler::RAX, true);
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R14);
assembler->retq();
// make sure we have at least 'min_patch_size' of bytes available.
......@@ -724,6 +748,7 @@ void JitFragmentWriter::_emitOSRPoint(RewriterVar* result, RewriterVar* node_var
assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R14);
assembler->retq();
}
......@@ -794,6 +819,7 @@ void JitFragmentWriter::_emitReturn(RewriterVar* return_val) {
assembler->mov(assembler::Immediate(0ul), assembler::RAX); // TODO: use xor
assembler->add(assembler::Immediate(JitCodeBlock::sp_adjustment), assembler::RSP);
assembler->pop(assembler::R12);
assembler->pop(assembler::R14);
assembler->retq();
return_val->bumpUse();
}
......
......@@ -70,12 +70,16 @@ class JitFragmentWriter;
// This also means that we are allowed to store a Python variable which only lives in the current CFGBLock* inside a
// register or stack slot but we aren't if it outlives the block - we have to store it in the interpreter instance.
//
// We use the following callee-save regs to speed up the generated code:
// r12: pointer to ASTInterpreter instance
// r14: pointer to the vregs array
//
// To execute a specific CFGBlock one has to call:
// CFGBlock* block;
// block->entry_code(ast_interpreter_instance, block)
// block->entry_code(ast_interpreter_instance, block, ast_interpreter_instance->vregs)
//
// Signature of a JitCodeBlock:
// std::pair<CFGBlock*, Box*>(*entry_code)(ASTInterpreter* interp, CFGBlock* block)
// std::pair<CFGBlock*, Box*>(*entry_code)(ASTInterpreter* interp, CFGBlock* block, Box** vregs)
// args:
// interp: instance to the ASTInterpreter
// block: block to execute
......@@ -87,10 +91,12 @@ class JitFragmentWriter;
//
// Basic layout of generated code block is:
// entry_code:
// push %r14 ; save r14
// push %r12 ; save r12
// sub $0x110,%rsp ; setup scratch, 0x110 = scratch_size + 16 = space for two func args passed on the
// stack
// sub $0x118,%rsp ; setup scratch, 0x118 = scratch_size + 16 = space for two func args passed on the
// stack + 8 byte for stack alignment
// mov %rdi,%r12 ; copy the pointer to ASTInterpreter instance into r12
// mov %rdx,%r14 ; copy the pointer to the vregs array into r14
// jmpq *0x8(%rsi) ; jump to block->code
// possible values: first_JitFragment, second_JitFragment,...
//
......@@ -101,8 +107,9 @@ class JitFragmentWriter;
// cmp %rax,%rcx ; rax == True
// jne end_side_exit
// movabs $0x215bb60,%rax ; rax = CFGBlock* to interpret next (rax is the 1. return reg)
// add $0x110,%rsp ; restore stack pointer
// add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12
// pop %r14 ; restore r14
// ret ; exit to the interpreter which will interpret the specified CFGBLock*
// end_side_exit:
// ....
......@@ -113,8 +120,9 @@ class JitFragmentWriter;
// mov $0,%rax ; rax contains the next block to interpret.
// in this case 0 which means we are finished
// movabs $0x1270014108,%rdx ; rdx must contain the Box* value to return
// add $0x110,%rsp ; restore stack pointer
// add $0x118,%rsp ; restore stack pointer
// pop %r12 ; restore r12
// pop %r14 ; restore r14
// ret
//
// nth_JitFragment:
......@@ -130,7 +138,7 @@ public:
// scratch size + space for passing additional args on the stack without having to adjust the SP when calling
// functions with more than 6 args.
static constexpr int sp_adjustment = scratch_size + num_stack_args * 8;
static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */;
private:
std::unique_ptr<uint8_t[]> code;
......@@ -168,6 +176,7 @@ private:
void* entry_code; // JitCodeBlock start address. Must have an offset of 0 into the code block
JitCodeBlock& code_block;
RewriterVar* interp;
RewriterVar* vregs_array;
llvm::DenseMap<InternedString, RewriterVar*> local_syms;
std::unique_ptr<ICInfo> ic_info;
......@@ -208,13 +217,13 @@ public:
RewriterVar* emitDeref(InternedString s);
RewriterVar* emitExceptionMatches(RewriterVar* v, RewriterVar* cls);
RewriterVar* emitGetAttr(RewriterVar* obj, BoxedString* s, AST_expr* node);
RewriterVar* emitGetBlockLocal(InternedString s);
RewriterVar* emitGetBlockLocal(InternedString s, int vreg);
RewriterVar* emitGetBoxedLocal(BoxedString* s);
RewriterVar* emitGetBoxedLocals();
RewriterVar* emitGetClsAttr(RewriterVar* obj, BoxedString* s);
RewriterVar* emitGetGlobal(Box* global, BoxedString* s);
RewriterVar* emitGetItem(RewriterVar* value, RewriterVar* slice);
RewriterVar* emitGetLocal(InternedString s);
RewriterVar* emitGetLocal(InternedString s, int vreg);
RewriterVar* emitGetPystonIter(RewriterVar* v);
RewriterVar* emitHasnext(RewriterVar* v);
RewriterVar* emitLandingpad();
......@@ -241,7 +250,7 @@ public:
void emitSetGlobal(Box* global, BoxedString* s, RewriterVar* v);
void emitSetItemName(BoxedString* s, RewriterVar* v);
void emitSetItem(RewriterVar* target, RewriterVar* slice, RewriterVar* value);
void emitSetLocal(InternedString s, bool set_closure, RewriterVar* v);
void emitSetLocal(InternedString s, int vreg, bool set_closure, RewriterVar* v);
void emitSideExit(RewriterVar* v, Box* cmp_value, CFGBlock* next_block);
void emitUncacheExcInfo();
......@@ -262,6 +271,7 @@ private:
RewriterVar* emitPPCall(void* func_addr, llvm::ArrayRef<RewriterVar*> args, int num_slots, int slot_size,
TypeRecorder* type_recorder = NULL);
static void assertNameDefinedHelper(const char* id);
static Box* callattrHelper(Box* obj, BoxedString* attr, CallattrFlags flags, TypeRecorder* type_recorder,
Box** args, std::vector<BoxedString*>* keyword_names);
static Box* createDictHelper(uint64_t num, Box** keys, Box** values);
......@@ -275,6 +285,7 @@ private:
static Box* runtimeCallHelper(Box* obj, ArgPassSpec argspec, TypeRecorder* type_recorder, Box** args,
std::vector<BoxedString*>* keyword_names);
void _emitGetLocal(RewriterVar* val_var, const char* name);
void _emitJump(CFGBlock* b, RewriterVar* block_next, int& size_of_exit_to_interp);
void _emitOSRPoint(RewriterVar* result, RewriterVar* node_var);
void _emitPPCall(RewriterVar* result, void* func_addr, const RewriterVar::SmallVector& args, int num_slots,
......
......@@ -703,6 +703,10 @@ public:
// different bytecodes.
ScopeInfo::VarScopeType lookup_type;
// The interpreter and baseline JIT store variables with FAST and CLOSURE scopes in an array (vregs) this specifies
// the zero based index of this variable inside the vregs array. If uninitialized it's value is -1.
int vreg;
virtual void accept(ASTVisitor* v);
virtual void* accept_expr(ExprVisitor* v);
......@@ -710,7 +714,8 @@ public:
: AST_expr(AST_TYPE::Name, lineno, col_offset),
ctx_type(ctx_type),
id(id),
lookup_type(ScopeInfo::VarScopeType::UNKNOWN) {}
lookup_type(ScopeInfo::VarScopeType::UNKNOWN),
vreg(-1) {}
static const AST_TYPE::AST_TYPE TYPE = AST_TYPE::Name;
};
......
......@@ -2471,6 +2471,87 @@ void CFG::print() {
blocks[i]->print();
}
class AssignVRegsVisitor : public NoopASTVisitor {
public:
int index = 0;
llvm::DenseMap<InternedString, int> sym_vreg_map;
ScopeInfo* scope_info;
AssignVRegsVisitor(ScopeInfo* scope_info) : scope_info(scope_info) {}
bool visit_arguments(AST_arguments* node) override {
for (AST_expr* d : node->defaults)
d->accept(this);
return true;
}
bool visit_classdef(AST_ClassDef* node) override {
for (auto e : node->bases)
e->accept(this);
for (auto e : node->decorator_list)
e->accept(this);
return true;
}
bool visit_functiondef(AST_FunctionDef* node) override {
for (auto* d : node->decorator_list)
d->accept(this);
node->args->accept(this);
return true;
}
bool visit_lambda(AST_Lambda* node) override {
node->args->accept(this);
return true;
}
bool visit_name(AST_Name* node) override {
if (node->vreg != -1)
return true;
if (node->lookup_type == ScopeInfo::VarScopeType::UNKNOWN)
node->lookup_type = scope_info->getScopeTypeOfName(node->id);
if (node->lookup_type == ScopeInfo::VarScopeType::FAST || node->lookup_type == ScopeInfo::VarScopeType::CLOSURE)
node->vreg = assignVReg(node->id);
return true;
}
int assignVReg(InternedString id) {
auto it = sym_vreg_map.find(id);
if (sym_vreg_map.end() == it) {
sym_vreg_map[id] = index;
return index++;
}
return it->second;
}
};
void CFG::assignVRegs(const ParamNames& param_names, ScopeInfo* scope_info) {
if (has_vregs_assigned)
return;
AssignVRegsVisitor visitor(scope_info);
for (CFGBlock* b : blocks) {
for (AST_stmt* stmt : b->body) {
stmt->accept(&visitor);
}
}
for (auto* name : param_names.arg_names) {
name->accept(&visitor);
}
if (param_names.vararg_name)
param_names.vararg_name->accept(&visitor);
if (param_names.kwarg_name)
param_names.kwarg_name->accept(&visitor);
sym_vreg_map = std::move(visitor.sym_vreg_map);
has_vregs_assigned = true;
}
CFG* computeCFG(SourceInfo* source, std::vector<AST_stmt*> body) {
STAT_TIMER(t0, "us_timer_computecfg", 0);
......
......@@ -39,6 +39,9 @@ class AST_stmt;
class Box;
class CFG;
class ParamNames;
class ScopeInfo;
class CFGBlock {
private:
CFG* cfg;
......@@ -48,7 +51,7 @@ public:
// contains address to the start of the code of this basic block
void* code;
// contains the address of the entry function
std::pair<CFGBlock*, Box*>(*entry_code)(void* interpeter, CFGBlock* block);
std::pair<CFGBlock*, Box*>(*entry_code)(void* interpeter, CFGBlock* block, Box** vregs);
std::vector<AST_stmt*> body;
std::vector<CFGBlock*> predecessors, successors;
......@@ -70,11 +73,14 @@ public:
class CFG {
private:
int next_idx;
bool has_vregs_assigned;
public:
std::vector<CFGBlock*> blocks;
CFG() : next_idx(0) {}
llvm::DenseMap<InternedString, int> sym_vreg_map;
CFG() : next_idx(0), has_vregs_assigned(false) {}
CFGBlock* getStartingBlock() { return blocks[0]; }
......@@ -103,6 +109,8 @@ public:
}
void print();
void assignVRegs(const ParamNames& param_names, ScopeInfo* scope_info);
};
class SourceInfo;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment