Commit 9407f64a authored by Kevin Modzelewski's avatar Kevin Modzelewski

Store live_outs as a bitset

We were storing and passing them as std::unordered_map (and sometimes
switching to std::vector).  But the set can only contain the integers
0 through 15, so just represent it as a bitset.
parent 896bc696
......@@ -183,14 +183,14 @@ ICSlotInfo* ICInfo::pickEntryForRewrite(const char* debug_name) {
}
ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet _live_outs,
assembler::GenericRegister return_register, TypeRecorder* type_recorder)
: next_slot_to_try(0),
stack_info(stack_info),
num_slots(num_slots),
slot_size(slot_size),
calling_conv(calling_conv),
live_outs(live_outs.begin(), live_outs.end()),
live_outs(std::move(_live_outs)),
return_register(return_register),
type_recorder(type_recorder),
retry_in(0),
......@@ -200,15 +200,14 @@ ICInfo::ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, S
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr) {
for (int i = 0; i < num_slots; i++) {
slots.push_back(ICSlotInfo(this, i));
slots.emplace_back(this, i);
}
}
static llvm::DenseMap<void*, ICInfo*> ics_by_return_addr;
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
const ICSetupInfo* ic, StackInfo stack_info,
std::unordered_set<int> live_outs) {
const ICSetupInfo* ic, StackInfo stack_info, LiveOutSet live_outs) {
assert(slowpath_start_addr - start_addr >= ic->num_slots * ic->slot_size);
assert(slowpath_rtn_addr > slowpath_start_addr);
assert(slowpath_rtn_addr <= start_addr + ic->totalSize());
......@@ -221,9 +220,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
live_outs.clear(DWARF_RAX);
// TODO we only need to do this if 0 was in live_outs, since if it wasn't, that indicates
// the return value won't be used and we can optimize based on that.
......@@ -247,7 +244,7 @@ std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t*
}
ICInfo* icinfo = new ICInfo(start_addr, slowpath_rtn_addr, continue_addr, stack_info, ic->num_slots, ic->slot_size,
ic->getCallingConvention(), live_outs, return_register, ic->type_recorder);
ic->getCallingConvention(), std::move(live_outs), return_register, ic->type_recorder);
ics_by_return_addr[slowpath_rtn_addr] = icinfo;
......
......@@ -23,6 +23,7 @@
#include "asm_writing/assembler.h"
#include "asm_writing/types.h"
#include "core/util.h"
namespace pyston {
......@@ -91,6 +92,8 @@ public:
friend class ICInfo;
};
typedef BitSet<16> LiveOutSet;
class ICInfo {
private:
std::vector<ICSlotInfo> slots;
......@@ -105,7 +108,7 @@ private:
const int num_slots;
const int slot_size;
const llvm::CallingConv::ID calling_conv;
const std::vector<int> live_outs;
LiveOutSet live_outs;
const assembler::GenericRegister return_register;
TypeRecorder* const type_recorder;
int retry_in, retry_backoff;
......@@ -116,14 +119,14 @@ private:
public:
ICInfo(void* start_addr, void* slowpath_rtn_addr, void* continue_addr, StackInfo stack_info, int num_slots,
int slot_size, llvm::CallingConv::ID calling_conv, const std::unordered_set<int>& live_outs,
int slot_size, llvm::CallingConv::ID calling_conv, LiveOutSet live_outs,
assembler::GenericRegister return_register, TypeRecorder* type_recorder);
void* const start_addr, *const slowpath_rtn_addr, *const continue_addr;
int getSlotSize() { return slot_size; }
int getNumSlots() { return num_slots; }
llvm::CallingConv::ID getCallingConvention() { return calling_conv; }
const std::vector<int>& getLiveOuts() { return live_outs; }
const LiveOutSet& getLiveOuts() { return live_outs; }
std::unique_ptr<ICSlotRewrite> startRewrite(const char* debug_name);
void clear(ICSlotInfo* entry);
......@@ -138,8 +141,7 @@ class ICSetupInfo;
struct CompiledFunction;
std::unique_ptr<ICInfo> registerCompiledPatchpoint(uint8_t* start_addr, uint8_t* slowpath_start_addr,
uint8_t* continue_addr, uint8_t* slowpath_rtn_addr,
const ICSetupInfo*, StackInfo stack_info,
std::unordered_set<int> live_outs);
const ICSetupInfo*, StackInfo stack_info, LiveOutSet live_outs);
void deregisterCompiledPatchpoint(ICInfo* ic);
ICInfo* getICInfo(void* rtn_addr);
......
......@@ -1786,7 +1786,7 @@ TypeRecorder* Rewriter::getTypeRecorder() {
return rewrite->getTypeRecorder();
}
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs)
Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs)
: rewrite(std::move(rewrite)),
assembler(this->rewrite->getAssembler()),
picked_slot(NULL),
......@@ -1837,6 +1837,11 @@ Rewriter::Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const s
var->locations.push_back(l);
}
// Make sure there weren't duplicates in the live_outs list.
// Probably not a big deal if there were, but we shouldn't be generating those.
assert(std::find(this->live_out_regs.begin(), this->live_out_regs.end(), dwarf_regnum)
== this->live_out_regs.end());
this->live_outs.push_back(var);
this->live_out_regs.push_back(dwarf_regnum);
}
......@@ -2034,8 +2039,8 @@ void setSlowpathFunc(uint8_t* pp_addr, void* func) {
}
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped) {
int scratch_offset, int scratch_size, LiveOutSet live_outs,
SpillMap& remapped) {
assert(start_addr < end_addr);
int est_slowpath_size = INITIAL_CALL_SIZE;
......@@ -2043,8 +2048,6 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
std::vector<assembler::GenericRegister> regs_to_spill;
std::vector<assembler::Register> regs_to_reload;
std::unordered_set<int> live_outs_for_slot;
for (int dwarf_regnum : live_outs) {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);
......@@ -2052,7 +2055,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
if (ru.type == assembler::GenericRegister::GP) {
if (ru.gp == assembler::RSP || ru.gp.isCalleeSave()) {
live_outs_for_slot.insert(dwarf_regnum);
live_outs.set(dwarf_regnum);
continue;
}
}
......@@ -2068,7 +2071,7 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
continue;
}
live_outs_for_slot.insert(dwarf_regnum);
live_outs.set(dwarf_regnum);
regs_to_spill.push_back(ru);
......@@ -2125,7 +2128,6 @@ PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t*
assem.fillWithNops();
assert(!assem.hasFailed());
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr,
std::move(live_outs_for_slot));
return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr, std::move(live_outs));
}
}
......@@ -355,7 +355,7 @@ protected:
llvm::SmallVector<RewriterVar*, 8> args;
llvm::SmallVector<RewriterVar*, 8> live_outs;
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const std::vector<int>& live_outs);
Rewriter(std::unique_ptr<ICSlotRewrite> rewrite, int num_args, const LiveOutSet& live_outs);
llvm::SmallVector<RewriterAction, 32> actions;
void addAction(std::function<void()> action, llvm::ArrayRef<RewriterVar*> vars, ActionType type) {
......@@ -555,10 +555,10 @@ struct PatchpointInitializationInfo {
uint8_t* slowpath_start;
uint8_t* slowpath_rtn_addr;
uint8_t* continue_addr;
std::unordered_set<int> live_outs;
LiveOutSet live_outs;
PatchpointInitializationInfo(uint8_t* slowpath_start, uint8_t* slowpath_rtn_addr, uint8_t* continue_addr,
std::unordered_set<int>&& live_outs)
LiveOutSet live_outs)
: slowpath_start(slowpath_start),
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr),
......@@ -566,8 +566,8 @@ struct PatchpointInitializationInfo {
};
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped);
int scratch_offset, int scratch_size, LiveOutSet live_outs,
SpillMap& remapped);
template <> inline RewriterVar* RewriterVar::getAttrCast<bool, bool>(int offset, Location loc) {
return getAttr(offset, loc, assembler::MovType::ZBL);
......
......@@ -102,7 +102,7 @@ std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, in
int scratch_offset = num_stack_args * 8;
StackInfo stack_info(scratch_size, scratch_offset);
std::unordered_set<int> live_outs;
LiveOutSet live_outs;
void* fragment_start = a.curInstPointer() - patch_jump_offset;
long fragment_offset = a.bytesWritten() - patch_jump_offset;
......@@ -612,13 +612,13 @@ int JitFragmentWriter::finishCompilation() {
uint8_t* end_addr = pp_info.end_addr;
PatchpointInitializationInfo initialization_info
= initializePatchpoint3(pp_info.func_addr, start_addr, end_addr, 0 /* scratch_offset */,
0 /* scratch_size */, std::unordered_set<int>(), _spill_map);
0 /* scratch_size */, LiveOutSet(), _spill_map);
uint8_t* slowpath_start = initialization_info.slowpath_start;
uint8_t* slowpath_rtn_addr = initialization_info.slowpath_rtn_addr;
std::unique_ptr<ICInfo> pp = registerCompiledPatchpoint(
start_addr, slowpath_start, initialization_info.continue_addr, slowpath_rtn_addr, pp_info.ic.get(),
pp_info.stack_info, std::unordered_set<int>());
std::unique_ptr<ICInfo> pp
= registerCompiledPatchpoint(start_addr, slowpath_start, initialization_info.continue_addr,
slowpath_rtn_addr, pp_info.ic.get(), pp_info.stack_info, LiveOutSet());
pp.release();
}
......
......@@ -247,7 +247,7 @@ ScopeInfo* IRGenState::getScopeInfoForNode(AST* node) {
class IREmitterImpl : public IREmitter {
private:
IRGenState* irstate;
IRBuilder* builder;
std::unique_ptr<IRBuilder> builder;
llvm::BasicBlock*& curblock;
IRGenerator* irgenerator;
......@@ -348,6 +348,9 @@ private:
public:
explicit IREmitterImpl(IRGenState* irstate, llvm::BasicBlock*& curblock, IRGenerator* irgenerator)
: irstate(irstate), builder(new IRBuilder(g.context)), curblock(curblock), irgenerator(irgenerator) {
// Perf note: it seems to be more efficient to separately allocate the "builder" member,
// even though we could allocate it in-line; maybe it's infrequently used enough that it's better
// to not have it take up cache space.
RELEASE_ASSERT(irstate->getSourceInfo()->scoping->areGlobalsFromModule(),
"jit doesn't support custom globals yet");
......@@ -356,7 +359,7 @@ public:
builder->SetInsertPoint(curblock);
}
IRBuilder* getBuilder() override { return builder; }
IRBuilder* getBuilder() override { return &*builder; }
GCBuilder* getGC() override { return irstate->getGC(); }
......@@ -557,8 +560,6 @@ public:
types(types),
state(RUNNING) {}
~IRGeneratorImpl() { delete emitter.getBuilder(); }
private:
OpInfo getOpInfoForNode(AST* ast, const UnwindInfo& unw_info) {
assert(ast);
......
......@@ -117,14 +117,14 @@ static int extractScratchOffset(PatchpointInfo* pp, StackMap::Record* r) {
return l.offset;
}
static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
std::unordered_set<int> live_outs;
static LiveOutSet extractLiveOuts(StackMap::Record* r, llvm::CallingConv::ID cc) {
LiveOutSet live_outs;
// Using the C calling convention, there shouldn't be any non-callee-save registers in here,
// but LLVM is conservative and will add some. So with the C cc, ignored the specified live outs
if (cc != llvm::CallingConv::C) {
for (const auto& live_out : r->live_outs) {
live_outs.insert(live_out.regnum);
live_outs.set(live_out.regnum);
}
}
......@@ -133,11 +133,11 @@ static std::unordered_set<int> extractLiveOuts(StackMap::Record* r, llvm::Callin
// sense to track them as live_outs.
// Unfortunately this means we need to be conservative about it unless
// we can change llvm's behavior.
live_outs.insert(3);
live_outs.insert(12);
live_outs.insert(13);
live_outs.insert(14);
live_outs.insert(15);
live_outs.set(3);
live_outs.set(12);
live_outs.set(13);
live_outs.set(14);
live_outs.set(15);
return live_outs;
}
......@@ -212,12 +212,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
if (ic == NULL) {
// We have to be using the C calling convention here, so we don't need to check the live outs
// or save them across the call.
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size,
std::unordered_set<int>(), frame_remapped);
initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, LiveOutSet(),
frame_remapped);
continue;
}
std::unordered_set<int> live_outs(extractLiveOuts(r, ic->getCallingConvention()));
LiveOutSet live_outs(extractLiveOuts(r, ic->getCallingConvention()));
if (ic->hasReturnValue()) {
assert(ic->getCallingConvention() == llvm::CallingConv::C
......@@ -226,14 +226,12 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
static const int DWARF_RAX = 0;
// It's possible that the return value doesn't get used, in which case
// we can avoid copying back into RAX at the end
if (live_outs.count(DWARF_RAX)) {
live_outs.erase(DWARF_RAX);
}
live_outs.clear(DWARF_RAX);
}
auto initialization_info = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset,
scratch_size, live_outs, frame_remapped);
scratch_size, std::move(live_outs), frame_remapped);
ASSERT(initialization_info.slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
"Used more slowpath space than expected; change ICSetupInfo::totalSize()?");
......
......@@ -128,6 +128,62 @@ template <class T1, class T2> bool sameKeyset(T1* lhs, T2* rhs) {
}
return good;
}
// A simple constant-width bitset.
template <int N> struct BitSet {
uint16_t bits = 0;
static_assert(N <= 8 * sizeof(bits), "");
struct iterator {
const BitSet& set;
int cur;
iterator(const BitSet& set, int cur) : set(set), cur(cur) {}
int operator*() {
assert(cur >= 0 && cur < N);
return cur;
}
bool operator==(const iterator& rhs) { return cur == rhs.cur; }
bool operator!=(const iterator& rhs) { return !(*this == rhs); }
iterator& operator++() {
// TODO: this function (and begin()) could be optimized using __builtin_ctz
assert(cur >= 0 && cur < N);
uint16_t tmp = set.bits;
tmp >>= cur + 1;
cur++;
while (cur < N) {
if (tmp & 1)
return *this;
cur++;
tmp >>= 1;
}
assert(cur == N);
return *this;
}
};
void set(int idx) {
assert(idx >= 0 && idx < N);
bits |= (1 << idx);
}
void clear(int idx) {
assert(idx >= 0 && idx < N);
bits &= ~(1 << idx);
}
iterator begin() const {
uint16_t tmp = bits;
for (int i = 0; i < N; i++) {
if (tmp & 1)
return iterator(*this, i);
}
return iterator(*this, N);
}
iterator end() const { return iterator(*this, N); }
};
}
#endif
......@@ -273,9 +273,8 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
SpillMap _spill_map;
PatchpointInitializationInfo initialization_info
= initializePatchpoint3(func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */,
std::unordered_set<int>(), _spill_map);
PatchpointInitializationInfo initialization_info = initializePatchpoint3(
func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */, LiveOutSet(), _spill_map);
assert(_spill_map.size() == 0);
assert(initialization_info.slowpath_start == pp_start + patchable_size);
assert(initialization_info.slowpath_rtn_addr == pp_end);
......@@ -283,7 +282,7 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
StackInfo stack_info(SCRATCH_BYTES, 0);
icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(),
stack_info, std::unordered_set<int>());
stack_info, LiveOutSet());
assembler::Assembler prologue_assem((uint8_t*)addr, PROLOGUE_SIZE);
#if RUNTIMEICS_OMIT_FRAME_PTR
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment