Commit f40abdee authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge commit 'ff657' into refcounting


Some other merge changes as well
parents 97858a5b ff657c1f
......@@ -108,10 +108,6 @@ static InternedString mangleName(InternedString id, llvm::StringRef private_name
return rtn;
static bool isCompilerCreatedName(llvm::StringRef name) {
return name[0] == '!' || name[0] == '#';
class ModuleScopeInfo : public ScopeInfo {
ScopeInfo* getParent() override { return NULL; }
......@@ -121,7 +117,7 @@ public:
bool passesThroughClosure() override { return false; }
VarScopeType getScopeTypeOfName(InternedString name) override {
if (isCompilerCreatedName(name))
if (name.isCompilerCreatedName())
return VarScopeType::FAST;
return VarScopeType::GLOBAL;
......@@ -185,7 +181,7 @@ public:
bool passesThroughClosure() override { return false; }
VarScopeType getScopeTypeOfName(InternedString name) override {
if (isCompilerCreatedName(name))
if (name.isCompilerCreatedName())
return VarScopeType::FAST;
else if (forced_globals.find(name) != forced_globals.end())
return VarScopeType::GLOBAL;
......@@ -341,7 +337,7 @@ public:
bool passesThroughClosure() override { return usage->passthrough_accesses.size() > 0 && !createsClosure(); }
VarScopeType getScopeTypeOfName(InternedString name) override {
if (isCompilerCreatedName(name))
if (name.isCompilerCreatedName())
return VarScopeType::FAST;
if (usage->forced_globals.count(name) > 0)
......@@ -53,8 +53,6 @@
namespace pyston {
static int calculateNumVRegs(FunctionMetadata* md);
namespace {
class ASTInterpreter;
......@@ -147,7 +145,7 @@ private:
Box** vregs;
ExcInfo last_exception;
BoxedClosure* passed_closure, *created_closure;
BoxedClosure* created_closure;
BoxedGenerator* generator;
unsigned edgecount;
FrameInfo frame_info;
......@@ -162,7 +160,7 @@ public:
~ASTInterpreter() {
int nvregs = calculateNumVRegs(md);
int nvregs = md->calculateNumVRegs();
for (int i = 0; i < nvregs; i++) {
......@@ -189,7 +187,7 @@ public:
FunctionMetadata* getMD() { return md; }
FrameInfo* getFrameInfo() { return &frame_info; }
BoxedClosure* getPassedClosure() { return passed_closure; }
BoxedClosure* getPassedClosure() { return frame_info.passed_closure; }
Box** getVRegs() { return vregs; }
const ScopeInfo* getScopeInfo() { return scope_info; }
......@@ -218,9 +216,9 @@ void ASTInterpreter::setGenerator(Box* gen) {
void ASTInterpreter::setPassedClosure(Box* closure) {
assert(!this->passed_closure); // This should only used for initialization
assert(closure->cls == closure_cls);
this->passed_closure = static_cast<BoxedClosure*>(closure);
assert(!frame_info.passed_closure); // This should only used for initialization
assert(!closure || closure->cls == closure_cls);
frame_info.passed_closure = static_cast<BoxedClosure*>(closure);
void ASTInterpreter::setCreatedClosure(Box* closure) {
......@@ -251,7 +249,6 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
last_exception(NULL, NULL, NULL),
......@@ -261,17 +258,18 @@ ASTInterpreter::ASTInterpreter(FunctionMetadata* md, Box** vregs)
should_jit(false) {
scope_info = source_info->getScopeInfo();
frame_info.vregs = vregs;
void ASTInterpreter::initArguments(BoxedClosure* _closure, BoxedGenerator* _generator, Box* arg1, Box* arg2, Box* arg3,
Box** args) {
passed_closure = _closure;
generator = _generator;
if (scope_info->createsClosure())
created_closure = createClosure(passed_closure, scope_info->getClosureSize());
created_closure = createClosure(_closure, scope_info->getClosureSize());
const ParamNames& param_names = md->param_names;
......@@ -428,17 +426,6 @@ Box* ASTInterpreter::executeInner(ASTInterpreter& interpreter, CFGBlock* start_b
Box* ASTInterpreter::execute(ASTInterpreter& interpreter, CFGBlock* start_block, AST_stmt* start_at) {
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_interpreter");
// Note: due to some (avoidable) restrictions, this check is pretty constrained in where
// it can go, due to the fact that it can throw an exception.
// It can't go in the ASTInterpreter constructor, since that will cause the C++ runtime to
// delete the partially-constructed memory which we don't currently handle. It can't go into
// executeInner since we want the SyntaxErrors to happen *before* the stack frame is entered.
// (For instance, throwing the exception will try to fetch the current statement, but we determine
// that by looking at the cfg.)
if (!interpreter.source_info->cfg)
interpreter.source_info->cfg = computeCFG(interpreter.source_info, interpreter.source_info->body);
return executeInnerAndSetupFrame(interpreter, start_block, start_at);
......@@ -773,8 +760,8 @@ Box* ASTInterpreter::doOSR(AST_Jump* node) {
if (generator)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_GENERATOR_NAME)] = generator;
if (passed_closure)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_CLOSURE_NAME)] = passed_closure;
if (frame_info.passed_closure)
sorted_symbol_table[source_info->getInternedStrings().get(PASSED_CLOSURE_NAME)] = frame_info.passed_closure;
if (created_closure)
sorted_symbol_table[source_info->getInternedStrings().get(CREATED_CLOSURE_NAME)] = created_closure;
......@@ -1106,9 +1093,9 @@ Value ASTInterpreter::createFunction(AST* node, AST_arguments* args, const std::
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, created_closure));
} else {
closure = passed_closure;
closure = frame_info.passed_closure;
if (jit)
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, passed_closure));
closure_var = jit->getInterp()->getAttr(offsetof(ASTInterpreter, frame_info.passed_closure));
......@@ -1177,7 +1164,7 @@ Value ASTInterpreter::visit_makeClass(AST_MakeClass* mkclass) {
BoxedClosure* closure = NULL;
if (scope_info->takesClosure()) {
if (this->scope_info->passesThroughClosure())
closure = passed_closure;
closure = getPassedClosure();
closure = created_closure;
......@@ -1726,8 +1713,8 @@ void ASTInterpreterJitInterface::delNameHelper(void* _interpreter, InternedStrin
Box* ASTInterpreterJitInterface::derefHelper(void* _interpreter, InternedString s) {
ASTInterpreter* interpreter = (ASTInterpreter*)_interpreter;
DerefInfo deref_info = interpreter->scope_info->getDerefInfo(s);
BoxedClosure* closure = interpreter->passed_closure;
BoxedClosure* closure = interpreter->getPassedClosure();
for (int i = 0; i < deref_info.num_parents_from_passed_closure; i++) {
closure = closure->parent;
......@@ -1793,28 +1780,6 @@ extern "C" Box* executeInnerFromASM(ASTInterpreter& interpreter, CFGBlock* start
return ASTInterpreter::executeInner(interpreter, start_block, start_at);
static int calculateNumVRegs(FunctionMetadata* md) {
SourceInfo* source_info = md->source.get();
CFG* cfg = source_info->cfg;
// Note: due to some (avoidable) restrictions, this check is pretty constrained in where
// it can go, due to the fact that it can throw an exception.
// It can't go in the ASTInterpreter constructor, since that will cause the C++ runtime to
// delete the partially-constructed memory which we don't currently handle. It can't go into
// executeInner since we want the SyntaxErrors to happen *before* the stack frame is entered.
// (For instance, throwing the exception will try to fetch the current statement, but we determine
// that by looking at the cfg.)
if (!cfg)
cfg = source_info->cfg = computeCFG(source_info, source_info->body);
if (!cfg->hasVregsAssigned()) {
ScopeInfo* scope_info = md->source->getScopeInfo();
cfg->assignVRegs(md->param_names, scope_info);
return cfg->sym_vreg_map.size();
Box* astInterpretFunction(FunctionMetadata* md, Box* closure, Box* generator, Box* globals, Box* arg1, Box* arg2,
Box* arg3, Box** args) {
UNAVOIDABLE_STAT_TIMER(t0, "us_timer_in_interpreter");
......@@ -1880,8 +1845,18 @@ Box* astInterpretFunction(FunctionMetadata* md, Box* closure, Box* generator, Bo
// Note: due to some (avoidable) restrictions, this check is pretty constrained in where
// it can go, due to the fact that it can throw an exception.
// It can't go in the ASTInterpreter constructor, since that will cause the C++ runtime to
// delete the partially-constructed memory which we don't currently handle. It can't go into
// executeInner since we want the SyntaxErrors to happen *before* the stack frame is entered.
// (For instance, throwing the exception will try to fetch the current statement, but we determine
// that by looking at the cfg.)
if (!source_info->cfg)
source_info->cfg = computeCFG(source_info, source_info->body);
Box** vregs = NULL;
int num_vregs = calculateNumVRegs(md);
int num_vregs = md->calculateNumVRegs();
if (num_vregs > 0) {
vregs = (Box**)alloca(sizeof(Box*) * num_vregs);
memset(vregs, 0, sizeof(Box*) * num_vregs);
......@@ -1911,8 +1886,19 @@ Box* astInterpretFunction(FunctionMetadata* md, Box* closure, Box* generator, Bo
Box* astInterpretFunctionEval(FunctionMetadata* md, Box* globals, Box* boxedLocals) {
// Note: due to some (avoidable) restrictions, this check is pretty constrained in where
// it can go, due to the fact that it can throw an exception.
// It can't go in the ASTInterpreter constructor, since that will cause the C++ runtime to
// delete the partially-constructed memory which we don't currently handle. It can't go into
// executeInner since we want the SyntaxErrors to happen *before* the stack frame is entered.
// (For instance, throwing the exception will try to fetch the current statement, but we determine
// that by looking at the cfg.)
SourceInfo* source_info = md->source.get();
if (!source_info->cfg)
source_info->cfg = computeCFG(source_info, source_info->body);
Box** vregs = NULL;
int num_vregs = calculateNumVRegs(md);
int num_vregs = md->calculateNumVRegs();
if (num_vregs > 0) {
vregs = (Box**)alloca(sizeof(Box*) * num_vregs);
memset(vregs, 0, sizeof(Box*) * num_vregs);
......@@ -1943,7 +1929,7 @@ static Box* astInterpretDeoptInner(FunctionMetadata* md, AST_expr* after_expr, A
SourceInfo* source_info = md->source.get();
Box** vregs = NULL;
int num_vregs = calculateNumVRegs(md);
int num_vregs = md->calculateNumVRegs();
if (num_vregs > 0) {
vregs = (Box**)alloca(sizeof(Box*) * num_vregs);
memset(vregs, 0, sizeof(Box*) * num_vregs);
......@@ -2068,15 +2054,16 @@ FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr) {
return interpreter->getFrameInfo();
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
Box** getVRegsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
BoxedDict* rtn = new BoxedDict();
for (auto& l : interpreter->getSymVRegMap()) {
if (only_user_visible && (l.first.s()[0] == '!' || l.first.s()[0] == '#'))
return interpreter->getVRegs();
Box* val = interpreter->getVRegs()[l.second];
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg) {
BoxedDict* rtn = new BoxedDict();
for (auto& l : cfg->sym_vreg_map_user_visible) {
Box* val = vregs[l.second];
if (val) {
rtn->d[l.first.getBox()] = val;
......@@ -2085,9 +2072,9 @@ BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible) {
return rtn;
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr) {
BoxedDict* localsForInterpretedFrame(void* frame_ptr) {
ASTInterpreter* interpreter = getInterpreterFromFramePtr(frame_ptr);
return interpreter->getPassedClosure();
return localsForInterpretedFrame(interpreter->getVRegs(), interpreter->getMD()->source->cfg);
......@@ -81,9 +81,10 @@ Box* getGlobalsForInterpretedFrame(void* frame_ptr);
FunctionMetadata* getMDForInterpretedFrame(void* frame_ptr);
struct FrameInfo;
FrameInfo* getFrameInfoForInterpretedFrame(void* frame_ptr);
BoxedClosure* passedClosureForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(void* frame_ptr, bool only_user_visible);
Box** getVRegsForInterpretedFrame(void* frame_ptr);
BoxedDict* localsForInterpretedFrame(Box** vregs, CFG* cfg);
BoxedDict* localsForInterpretedFrame(void* frame_ptr);
// Executes the equivalent of CPython's PRINT_EXPR opcode (call sys.displayhook)
extern "C" void printExprHelper(Box* b);
......@@ -935,7 +935,7 @@ void JitFragmentWriter::_emitPPCall(RewriterVar* result, void* func_addr, llvm::
// make space for patchpoint
uint8_t* pp_start = rewrite->getSlotStart() + assembler->bytesWritten();
constexpr int call_size = 16;
constexpr int call_size = 13;
assembler->skipBytes(pp_size + call_size);
uint8_t* pp_end = rewrite->getSlotStart() + assembler->bytesWritten();
assert(assembler->hasFailed() || (pp_start + pp_size + call_size == pp_end));
......@@ -30,6 +30,7 @@
#include "codegen/baseline_jit.h"
#include "codegen/compvars.h"
#include "core/ast.h"
#include "core/cfg.h"
#include "core/util.h"
#include "runtime/code.h"
#include "runtime/types.h"
......@@ -72,6 +73,20 @@ BoxedCode* FunctionMetadata::getCode() {
return code_obj;
int FunctionMetadata::calculateNumVRegs() {
SourceInfo* source_info = source.get();
CFG* cfg = source_info->cfg;
assert(cfg && "We don't calculate the CFG inside this function because it can raise an exception and its "
"therefore not safe to call at every point");
if (!cfg->hasVregsAssigned()) {
ScopeInfo* scope_info = source->getScopeInfo();
cfg->assignVRegs(param_names, scope_info);
return cfg->sym_vreg_map.size();
void FunctionMetadata::addVersion(CompiledFunction* compiled) {
assert((compiled->spec != NULL) + (compiled->entry_descriptor != NULL) == 1);
......@@ -343,6 +343,7 @@ void PystonObjectCache::calculateModuleHash(const llvm::Module* M, EffortLevel e
HashOStream hash_stream;
llvm::WriteBitcodeToFile(M, hash_stream);
hash_stream << (int)effort;
hash_stream << USE_REGALLOC_BASIC;
hash_before_codegen = hash_stream.getHash();
......@@ -104,6 +104,8 @@ public:
virtual void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val,
llvm::Value* exc_val, bool double_check = false) = 0;
virtual llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) = 0;
virtual BORROWED(Box*) getIntConstant(int64_t n) = 0;
virtual BORROWED(Box*) getFloatConstant(double d) = 0;
......@@ -59,6 +59,7 @@ IRGenState::IRGenState(FunctionMetadata* md, CompiledFunction* cf, SourceInfo* s
scratch_size(0) {
assert(!cf->md); // in this case don't need to pass in sourceinfo
......@@ -144,7 +145,7 @@ static llvm::Value* getExcinfoGep(llvm::IRBuilder<true>& builder, llvm::Value* v
return builder.CreateConstInBoundsGEP2_32(v, 0, 0);
static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value* v) {
template <typename Builder> static llvm::Value* getFrameObjGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, exc) == 0, "");
static_assert(sizeof(ExcInfo) == 24, "");
static_assert(sizeof(Box*) == 8, "");
......@@ -154,6 +155,16 @@ static llvm::Value* getFrameObjGep(llvm::IRBuilder<true>& builder, llvm::Value*
// gep->accumulateConstantOffset(>getDataLayout(), ap_offset)
template <typename Builder> static llvm::Value* getPassedClosureGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, passed_closure) == 40, "");
return builder.CreateConstInBoundsGEP2_32(v, 0, 3);
template <typename Builder> static llvm::Value* getVRegsGep(Builder& builder, llvm::Value* v) {
static_assert(offsetof(FrameInfo, vregs) == 48, "");
return builder.CreateConstInBoundsGEP2_32(v, 0, 4);
llvm::Value* IRGenState::getFrameInfoVar() {
There is a matrix of possibilities here.
......@@ -181,10 +192,6 @@ llvm::Value* IRGenState::getFrameInfoVar() {
if (entry_block.begin() != entry_block.end())
builder.SetInsertPoint(&entry_block, entry_block.getFirstInsertionPt());
llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
if (entry_block.getTerminator())
......@@ -195,13 +202,34 @@ llvm::Value* IRGenState::getFrameInfoVar() {
this->frame_info = frame_info_arg;
// use vrags array from the interpreter
vregs = builder.CreateLoad(getVRegsGep(builder, frame_info_arg));
if (getScopeInfo()->usesNameLookup()) {
// load frame_info.boxedLocals
this->boxed_locals = builder.CreateLoad(getBoxedLocalsGep(builder, this->frame_info));
} else {
// The "normal" case
int num_user_visible_vregs = getMD()->source->cfg->sym_vreg_map_user_visible.size();
if (num_user_visible_vregs > 0) {
auto* vregs_alloca
= builder.CreateAlloca(g.llvm_value_type_ptr, getConstantInt(num_user_visible_vregs), "vregs");
// Clear the vregs array because 0 means undefined valued.
builder.CreateMemSet(vregs_alloca, getConstantInt(0, g.i8),
getConstantInt(num_user_visible_vregs * sizeof(Box*)),
vregs = vregs_alloca;
} else
vregs = getNullPtr(g.llvm_value_type_ptr_ptr);
llvm::AllocaInst* al = builder.CreateAlloca(g.llvm_frame_info_type, NULL, "frame_info");
// frame_info.exc.type = NULL
llvm::Constant* null_value = getNullPtr(g.llvm_value_type_ptr);
getRefcounts()->setType(null_value, RefType::BORROWED);
......@@ -228,6 +256,19 @@ llvm::Value* IRGenState::getFrameInfoVar() {
builder.CreateStore(getRefcounts()->setType(getNullPtr(llvm_frame_obj_type_ptr), RefType::BORROWED),
getFrameObjGep(builder, al));
// frame_info.passed_closure = NULL
auto passed_closure = getNullPtr(g.llvm_closure_type_ptr);
getRefcounts()->setType(passed_closure, RefType::BORROWED);
auto store = builder.CreateStore(passed_closure, getPassedClosureGep(builder, al));
// TODO: reenable these? technically correct, but currently unsupported and the expected
// behavior is to just optimize them away anyway.
// emitter.setNullable(passed_closure, true);
// emitter.refConsumed(passed_closure, store);
// set frame_info.vregs
builder.CreateStore(vregs, getVRegsGep(builder, al));
this->frame_info = al;
......@@ -242,6 +283,15 @@ llvm::Value* IRGenState::getBoxedLocalsVar() {
return this->boxed_locals;
llvm::Value* IRGenState::getVRegsVar() {
if (!vregs) {
// calling this sets also the vregs member
return vregs;
ScopeInfo* IRGenState::getScopeInfo() {
return getSourceInfo()->getScopeInfo();
......@@ -493,6 +543,14 @@ public:
return rtn.getInstruction();
llvm::Value* createDeopt(AST_stmt* current_stmt, AST_expr* node, llvm::Value* node_value) override {
ICSetupInfo* pp = createDeoptIC();
llvm::Value* v
= createIC(pp, (void*)pyston::deopt, { embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value },
UnwindInfo(current_stmt, NULL));
return getBuilder()->CreateIntToPtr(v, g.llvm_value_type_ptr);
void checkAndPropagateCapiException(const UnwindInfo& unw_info, llvm::Value* returned_val, llvm::Value* exc_val,
bool double_check = false) override {
assert(!double_check); // need to call PyErr_Occurred
......@@ -671,8 +729,7 @@ private:
curblock = deopt_bb;
llvm::Value* v = emitter.createCall2(UnwindInfo(current_statement, NULL), g.funcs.deopt,
embedRelocatablePtr(node, g.llvm_astexpr_type_ptr), node_value);
llvm::Value* v = emitter.createDeopt(current_statement, (AST_expr*)node, node_value);
curblock = success_bb;
......@@ -1704,6 +1761,22 @@ private:
return rtn;
template <typename GetLLVMValCB> void _setVRegIfUserVisible(InternedString name, GetLLVMValCB get_llvm_val_cb) {
auto cfg = irstate->getSourceInfo()->cfg;
if (!cfg->hasVregsAssigned())
int vreg = cfg->sym_vreg_map[name];
assert(vreg >= 0);
if (vreg < cfg->sym_vreg_map_user_visible.size()) {
// looks like this store don't have to be volatile because llvm knows that the vregs are visible thru the
// FrameInfo which escapes.
auto* gep = emitter.getBuilder()->CreateConstInBoundsGEP1_64(irstate->getVRegsVar(), vreg);
emitter.getBuilder()->CreateStore(get_llvm_val_cb(), gep);
// only updates symbol_table if we're *not* setting a global
void _doSet(InternedString name, CompilerVariable* val, const UnwindInfo& unw_info) {
assert(name.s() != "None");
......@@ -1753,6 +1826,9 @@ private:
llvm::Value* gep = getClosureElementGep(emitter, closureValue, offset);
emitter.getBuilder()->CreateStore(val->makeConverted(emitter, UNKNOWN)->getValue(), gep);
auto&& get_llvm_val = [&]() { return val->makeConverted(emitter, UNKNOWN)->getValue(); };
_setVRegIfUserVisible(name, get_llvm_val);
......@@ -1935,6 +2011,8 @@ private:
// SyntaxError: can not delete variable 'x' referenced in nested scope
assert(vst == ScopeInfo::VarScopeType::FAST);
_setVRegIfUserVisible(target->id, []() { return getNullPtr(g.llvm_value_type_ptr); });
if (symbol_table.count(target->id) == 0) {
llvm::CallSite call = emitter.createCall(
unw_info, g.funcs.assertNameDefined,
......@@ -2507,7 +2585,8 @@ public:
pp->addFrameVar("!current_stmt", UNBOXED_INT);
// For deopts we need to add the compiler created names to the stackmap
if (ENABLE_FRAME_INTROSPECTION && pp->isDeopt()) {
// TODO: don't need to use a sorted symbol table if we're explicitly recording the names!
// nice for debugging though.
typedef std::pair<InternedString, CompilerVariable*> Entry;
......@@ -2515,6 +2594,11 @@ public:
std::sort(sorted_symbol_table.begin(), sorted_symbol_table.end(),
[](const Entry& lhs, const Entry& rhs) { return lhs.first < rhs.first; });
for (const auto& p : sorted_symbol_table) {
// We never have to include non compiler generated vars because the user visible variables are stored
// inside the vregs array.
if (!p.first.isCompilerCreatedName())
CompilerVariable* v = p.second;
pp->addFrameVar(p.first.s(), v->getType());
......@@ -2637,6 +2721,11 @@ public:
emitter.setType(passed_closure, RefType::BORROWED);
= new ConcreteCompilerVariable(getPassedClosureType(), AI);
// store the passed_closure inside the frame info so that frame introspection can access it without needing
// a stackmap entry
getPassedClosureGep(*emitter.getBuilder(), irstate->getFrameInfoVar()));
......@@ -75,6 +75,7 @@ private:
llvm::Value* boxed_locals;
llvm::Value* frame_info_arg;
llvm::Value* globals;
llvm::Value* vregs;
int scratch_size;
......@@ -96,6 +97,7 @@ public:
llvm::Value* getScratchSpace(int min_bytes);
llvm::Value* getFrameInfoVar();
llvm::Value* getBoxedLocalsVar();
llvm::Value* getVRegsVar();
ConcreteCompilerType* getReturnType() { return cf->getReturnType(); }
......@@ -398,7 +398,9 @@ void RefcountTracker::addRefcounts(IRGenState* irstate) {
if (!ok_type) {
#ifndef NDEBUG
if (s->getName().startswith("struct.pyston::Box") || (s->getName().startswith("Py") || s->getName().endswith("Object")) || s->getName().startswith("class.pyston::Box")) {
if (s->getName().startswith("struct.pyston::Box")
|| (s->getName().startswith("Py") && s->getName().endswith("Object"))
|| s->getName().startswith("class.pyston::Box")) {
if (s && s->elements().size() >= 2) {
......@@ -34,6 +34,9 @@ void PatchpointInfo::addFrameVar(llvm::StringRef name, CompilerType* type) {
int ICSetupInfo::totalSize() const {
if (isDeopt())
int call_size = CALL_ONLY_SIZE;
if (getCallingConvention() != llvm::CallingConv::C) {
// 14 bytes per reg that needs to be spilled
......@@ -198,7 +201,8 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
if (spilled)
ASSERT(nspills <= MAX_FRAME_SPILLS, "did %d spills but expected only %d!", nspills, MAX_FRAME_SPILLS);
RELEASE_ASSERT(nspills <= pp->numFrameSpillsSupported(), "did %d spills but expected only %d!", nspills,
assert(scratch_size % sizeof(void*) == 0);
assert(scratch_rbp_offset % sizeof(void*) == 0);
......@@ -216,7 +220,6 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
LiveOutSet live_outs(extractLiveOuts(r, ic->getCallingConvention()));
if (ic->hasReturnValue()) {
......@@ -351,4 +354,8 @@ ICSetupInfo* createHasnextIC(TypeRecorder* type_recorder) {
return ICSetupInfo::initialize(true, 2, 64, ICSetupInfo::Hasnext, type_recorder);
ICSetupInfo* createDeoptIC() {
return ICSetupInfo::initialize(true, 1, 0, ICSetupInfo::Deopt, NULL);
} // namespace pyston
......@@ -29,16 +29,71 @@ struct CompiledFunction;
class CompilerType;
struct StackMap;
class TypeRecorder;
class ICSetupInfo;
static const int MAX_FRAME_SPILLS = 9; // TODO this shouldn't have to be larger than the set of non-callee-save args (9)
// except that will we currently spill the same reg multiple times
static const int CALL_ONLY_SIZE
static const int CALL_ONLY_SIZE = 13 + 1; // 13 for the call, + 1 if we want to nop/trap
static const int DEOPT_CALL_ONLY_SIZE
= 13 + (MAX_FRAME_SPILLS * 9)
+ 1; // 13 for the call, 9 bytes per spill (7 for GP, 9 for XMM), + 1 if we want to nop/trap
void processStackmap(CompiledFunction* cf, StackMap* stackmap);
class ICSetupInfo {
enum ICType {
ICSetupInfo(ICType type, int num_slots, int slot_size, bool has_return_value, TypeRecorder* type_recorder)
: type(type),
type_recorder(type_recorder) {}
const ICType type;
const int num_slots, slot_size;
const bool has_return_value;
TypeRecorder* const type_recorder;
int totalSize() const;
bool hasReturnValue() const { return has_return_value; }
bool isDeopt() const { return type == Deopt; }
llvm::CallingConv::ID getCallingConvention() const {
// FIXME: we currently have some issues with using PreserveAll (the rewriter currently
// does not completely preserve live outs), so disable it temporarily.
#if 0
// The plan is to switch probably everything over to PreseveAll (and potentially AnyReg),
// but for only switch Getattr so the testing can be localized:
if (type == Getattr || type == Setattr)
return llvm::CallingConv::PreserveAll;
return llvm::CallingConv::C;
static ICSetupInfo* initialize(bool has_return_value, int num_slots, int slot_size, ICType type,
TypeRecorder* type_recorder);
struct PatchpointInfo {
struct FrameVarInfo {
......@@ -73,6 +128,8 @@ public:
int scratchStackmapArg() { return 0; }
int scratchSize() { return 80 + MAX_FRAME_SPILLS * sizeof(void*); }
bool isDeopt() const { return icinfo ? icinfo->isDeopt() : false; }
int numFrameSpillsSupported() const { return isDeopt() ? MAX_FRAME_SPILLS : 0; }
void addFrameVar(llvm::StringRef name, CompilerType* type);
void setNumFrameArgs(int num_frame_args) {
......@@ -100,58 +157,6 @@ public:
static void* getSlowpathAddr(unsigned int pp_id);
class ICSetupInfo {
enum ICType {
ICSetupInfo(ICType type, int num_slots, int slot_size, bool has_return_value, TypeRecorder* type_recorder)
: type(type),
type_recorder(type_recorder) {}
const ICType type;
const int num_slots, slot_size;
const bool has_return_value;
TypeRecorder* const type_recorder;
int totalSize() const;
bool hasReturnValue() const { return has_return_value; }
llvm::CallingConv::ID getCallingConvention() const {
// FIXME: we currently have some issues with using PreserveAll (the rewriter currently
// does not completely preserve live outs), so disable it temporarily.
#if 0
// The plan is to switch probably everything over to PreseveAll (and potentially AnyReg),
// but for only switch Getattr so the testing can be localized:
if (type == Getattr || type == Setattr)
return llvm::CallingConv::PreserveAll;
return llvm::CallingConv::C;
static ICSetupInfo* initialize(bool has_return_value, int num_slots, int slot_size, ICType type,
TypeRecorder* type_recorder);
class ICInfo;
ICSetupInfo* createGenericIC(TypeRecorder* type_recorder, bool has_return_value, int size);
ICSetupInfo* createCallsiteIC(TypeRecorder* type_recorder, int num_args, ICInfo* bjit_ic_info);
......@@ -165,6 +170,7 @@ ICSetupInfo* createDelitemIC(TypeRecorder* type_recorder);
ICSetupInfo* createBinexpIC(TypeRecorder* type_recorder, ICInfo* bjit_ic_info);
ICSetupInfo* createNonzeroIC(TypeRecorder* type_recorder);
ICSetupInfo* createHasnextIC(TypeRecorder* type_recorder);
ICSetupInfo* createDeoptIC();
} // namespace pyston
......@@ -36,6 +36,7 @@
#include "codegen/irgen/hooks.h"
#include "codegen/irgen/irgenerator.h"
#include "codegen/stackmaps.h"
#include "core/cfg.h"
#include "core/util.h"
#include "runtime/ctxswitching.h"
#include "runtime/objmodel.h"
......@@ -555,9 +556,6 @@ public:
void handleCFrame(unw_cursor_t* cursor) {
unw_word_t ip = get_cursor_ip(cursor);
unw_word_t bp = get_cursor_bp(cursor);
PythonFrameIteratorImpl frame_iter;
bool found_frame = pystack_extractor.handleCFrame(cursor, &frame_iter);
if (found_frame) {
......@@ -829,9 +827,6 @@ PythonFrameIterator::PythonFrameIterator(std::unique_ptr<PythonFrameIteratorImpl
std::swap(this->impl, impl);
// TODO factor getDeoptState and fastLocalsToBoxedLocals
// because they are pretty ugly but have a pretty repetitive pattern.
DeoptState getDeoptState() {
DeoptState rtn;
bool found = false;
......@@ -840,6 +835,7 @@ DeoptState getDeoptState() {
BoxedClosure* closure;
CompiledFunction* cf;
if (frame_iter->getId().type == PythonFrameId::COMPILED) {
assert(0 && "check refcounting");
d = new BoxedDict();
cf = frame_iter->getCF();
......@@ -870,6 +866,23 @@ DeoptState getDeoptState() {
// We could do much better here by memcpying the user visible vregs into the new location which the
// interpreter allocated, instead of storing them one by one in a dict and then retrieving them
// and assigning them to the new vregs array...
// But deopts are so rare it's not really worth it.
Box** vregs = frame_iter->getFrameInfo()->vregs;
for (const auto& p : cf->md->source->cfg->sym_vreg_map_user_visible) {
if (is_undefined.count(p.first.s()))
assert(p.second >= 0 && p.second < cf->md->source->cfg->sym_vreg_map_user_visible.size());
Box* v = vregs[p.second];
if (!v)
d->d[p.first.getBox()] = v;
for (const auto& p : cf->location_map->names) {
if (p.first()[0] == '!')
......@@ -915,10 +928,6 @@ Box* fastLocalsToBoxedLocals() {
Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
BoxedDict* d;
BoxedClosure* closure;
FrameInfo* frame_info;
FunctionMetadata* md = impl->getMD();
ScopeInfo* scope_info = md->source->getScopeInfo();
......@@ -929,90 +938,15 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
return md->source->parent_module->getAttrWrapper();
BoxedDict* d;
FrameInfo* frame_info = impl->getFrameInfo();
BoxedClosure* closure = frame_info->passed_closure;
if (impl->getId().type == PythonFrameId::COMPILED) {
CompiledFunction* cf = impl->getCF();
d = new BoxedDict();
uint64_t ip = impl->getId().ip;
assert(ip > cf->code_start);
unsigned offset = ip - cf->code_start;
// We have to detect + ignore any entries for variables that
// could have been defined (so they have entries) but aren't (so the
// entries point to uninitialized memory).
std::unordered_set<std::string> is_undefined;
for (const auto& p : cf->location_map->names) {
if (!startswith(p.first(), "!is_defined_"))
auto e = p.second.findEntry(offset);
if (e) {
const auto& locs = e->locations;
assert(locs.size() == 1);
uint64_t v = impl->readLocation(locs[0]);
if ((v & 1) == 0)
for (const auto& p : cf->location_map->names) {
if (p.first()[0] == '!')
if (p.first()[0] == '#')
if (is_undefined.count(p.first()))
auto e = p.second.findEntry(offset);
if (e) {
const auto& locs = e->locations;
llvm::SmallVector<uint64_t, 1> vals;
// printf("%s: %s\n", p.first().c_str(), e.type->debugName().c_str());
// printf("%ld locs\n", locs.size());
for (auto& loc : locs) {
auto v = impl->readLocation(loc);
// printf("%d %d %d: 0x%lx\n", loc.type, loc.regnum, loc.offset, v);
// dump((void*)v);
Box* v = e->type->deserializeFromFrame(vals);
// printf("%s: (pp id %ld) %p\n", p.first().c_str(), e._debug_pp_id, v);
d->d[boxString(p.first())] = v;
closure = NULL;
if (cf->location_map->names.count(PASSED_CLOSURE_NAME) > 0) {
auto e = cf->location_map->names[PASSED_CLOSURE_NAME].findEntry(offset);
if (e) {
const auto& locs = e->locations;
llvm::SmallVector<uint64_t, 1> vals;
for (auto& loc : locs) {
Box* v = e->type->deserializeFromFrame(vals);
closure = static_cast<BoxedClosure*>(v);
frame_info = impl->getFrameInfo();
assert(impl->getId().ip > cf->code_start);
d = localsForInterpretedFrame(frame_info->vregs, cf->md->source->cfg);
} else if (impl->getId().type == PythonFrameId::INTERPRETED) {
d = localsForInterpretedFrame((void*)impl->getId().bp, true);
closure = passedClosureForInterpretedFrame((void*)impl->getId().bp);
frame_info = getFrameInfoForInterpretedFrame((void*)impl->getId().bp);
d = localsForInterpretedFrame((void*)impl->getId().bp);
} else {
......@@ -1046,10 +980,14 @@ Box* PythonFrameIterator::fastLocalsToBoxedLocals() {
// TODO Right now d just has all the python variables that are *initialized*
// But we also need to loop through all the uninitialized variables that we have
// access to and delete them from the locals dict
for (const auto& p : *d) {
Box* varname = p.first;
Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value);
if (frame_info->boxedLocals == dict_cls) {
((BoxedDict*)frame_info->boxedLocals)->d.insert(d->d.begin(), d->d.end());
} else {
for (const auto& p : *d) {
Box* varname = p.first;
Box* value = p.second;
setitem(frame_info->boxedLocals, varname, value);
return frame_info->boxedLocals;
......@@ -2528,10 +2528,11 @@ void CFG::print(llvm::raw_ostream& stream) {
class AssignVRegsVisitor : public NoopASTVisitor {
int index = 0;
bool only_user_visible;
llvm::DenseMap<InternedString, int> sym_vreg_map;
ScopeInfo* scope_info;
AssignVRegsVisitor(ScopeInfo* scope_info) : scope_info(scope_info) {}
AssignVRegsVisitor(ScopeInfo* scope_info, bool only_user_visible) : only_user_visible(only_user_visible), scope_info(scope_info) {}
bool visit_arguments(AST_arguments* node) override {
for (AST_expr* d : node->defaults)
......@@ -2563,6 +2564,9 @@ public:
if (node->vreg != -1)
return true;
if (only_user_visible && node->id.isCompilerCreatedName())
return true;
if (node->lookup_type == ScopeInfo::VarScopeType::UNKNOWN)
node->lookup_type = scope_info->getScopeTypeOfName(node->id);
......@@ -2585,23 +2589,31 @@ void CFG::assignVRegs(const ParamNames& param_names, ScopeInfo* scope_info) {
if (has_vregs_assigned)
AssignVRegsVisitor visitor(scope_info);
for (CFGBlock* b : blocks) {
for (AST_stmt* stmt : b->body) {
AssignVRegsVisitor visitor(scope_info, true);
// we need todo two passes: first we assign the user visible vars a vreg and then the compiler created get there value.
for (int i=0; i<2; ++i) {
for (CFGBlock* b : blocks) {
for (AST_stmt* stmt : b->body) {
for (auto* name : param_names.arg_names) {
for (auto* name : param_names.arg_names) {
if (param_names.vararg_name)
if (param_names.vararg_name)
if (param_names.kwarg_name)
if (param_names.kwarg_name)
if (visitor.only_user_visible) {
visitor.only_user_visible = false;
sym_vreg_map_user_visible = visitor.sym_vreg_map;
sym_vreg_map = std::move(visitor.sym_vreg_map);
has_vregs_assigned = true;
......@@ -81,7 +81,10 @@ private:
std::vector<CFGBlock*> blocks;
// Contains the vreg assignment for every name including the user visible ones
// (which will have lower ids than the compiler generated ones).
llvm::DenseMap<InternedString, int> sym_vreg_map;
llvm::DenseMap<InternedString, int> sym_vreg_map_user_visible;
CFG() : next_idx(0), has_vregs_assigned(false) {}
......@@ -37,4 +37,10 @@ llvm::StringRef InternedString::s() const {
const char* InternedString::c_str() const {
return _str->c_str();
bool InternedString::isCompilerCreatedName() const {
char c = _str->s()[0];
return c == '!' || c == '#';
......@@ -83,6 +83,8 @@ public:
operator llvm::StringRef() const { return s(); }
operator BoxedString*() const { return getBox(); }
bool isCompilerCreatedName() const;
friend class InternedStringPool;
friend struct std::hash<InternedString>;
friend struct std::less<InternedString>;
......@@ -494,6 +494,8 @@ public:
void addVersion(void* f, ConcreteCompilerType* rtn_type, const std::vector<ConcreteCompilerType*>& arg_types,
ExceptionStyle exception_style = CXX);
int calculateNumVRegs();
// Helper function, meant for the C++ runtime, which allocates a FunctionMetadata object and calls addVersion
// once to it.
static FunctionMetadata* create(void* f, ConcreteCompilerType* rtn_type, int nargs, bool takes_varargs,
......@@ -935,8 +937,11 @@ struct FrameInfo {
Box* boxedLocals;
BoxedFrame* frame_obj;
BoxedClosure* passed_closure;
Box** vregs;
FrameInfo(ExcInfo exc) : exc(exc), boxedLocals(NULL), frame_obj(0) {}
FrameInfo(ExcInfo exc) : exc(exc), boxedLocals(NULL), frame_obj(0), passed_closure(0), vregs(0) {}
// callattr() takes a number of flags and arguments, and for performance we pack them into a single register:
Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment