Commit 752653f5 authored by Marius Wachtler's avatar Marius Wachtler Committed by Travis Hance

Rewriter: Optimize contant value loading

- Reuse a register if it already contains the specified value
- Generate LEA when beneficial
--> Generated code is smaller and has same or better performance
parent 46d6bba3
...@@ -789,6 +789,38 @@ void Assembler::cmp(Indirect mem, Register reg) { ...@@ -789,6 +789,38 @@ void Assembler::cmp(Indirect mem, Register reg) {
} }
} }
void Assembler::lea(Indirect mem, Register reg) {
int mem_idx = mem.base.regnum;
int reg_idx = reg.regnum;
int rex = REX_W;
if (mem_idx >= 8) {
rex |= REX_B;
mem_idx -= 8;
}
if (reg_idx >= 8) {
rex |= REX_R;
reg_idx -= 8;
}
assert(mem_idx >= 0 && mem_idx < 8);
assert(reg_idx >= 0 && reg_idx < 8);
emitRex(rex);
emitByte(0x8D);
if (mem.offset == 0) {
emitModRM(0b00, reg_idx, mem_idx);
} else if (-0x80 <= mem.offset && mem.offset < 0x80) {
emitModRM(0b01, reg_idx, mem_idx);
emitByte(mem.offset);
} else {
assert((-1L << 31) <= mem.offset && mem.offset < (1L << 31) - 1);
emitModRM(0b10, reg_idx, mem_idx);
emitInt(mem.offset, 4);
}
}
void Assembler::test(Register reg1, Register reg2) { void Assembler::test(Register reg1, Register reg2) {
int reg1_idx = reg1.regnum; int reg1_idx = reg1.regnum;
int reg2_idx = reg2.regnum; int reg2_idx = reg2.regnum;
......
...@@ -136,6 +136,8 @@ public: ...@@ -136,6 +136,8 @@ public:
void cmp(Indirect mem, Immediate imm); void cmp(Indirect mem, Immediate imm);
void cmp(Indirect mem, Register reg); void cmp(Indirect mem, Register reg);
void lea(Indirect mem, Register reg);
void test(Register reg1, Register reg2); void test(Register reg1, Register reg2);
void jmp_cond(JumpDestination dest, ConditionCode condition); void jmp_cond(JumpDestination dest, ConditionCode condition);
......
...@@ -136,12 +136,98 @@ void RewriterVar::addGuard(uint64_t val) { ...@@ -136,12 +136,98 @@ void RewriterVar::addGuard(uint64_t val) {
rewriter->addAction([=]() { rewriter->_addGuard(this, val); }, { this }, ActionType::GUARD); rewriter->addAction([=]() { rewriter->_addGuard(this, val); }, { this }, ActionType::GUARD);
} }
Rewriter::ConstLoader::ConstLoader(Rewriter* rewriter) : rewriter(rewriter) {
invalidateAll();
}
bool Rewriter::ConstLoader::tryRegRegMove(uint64_t val, assembler::Register dst_reg) {
// copy the value if there is a register which contains already the value
bool found_value = false;
assembler::Register src_reg = findConst(val, found_value);
if (found_value) {
if (src_reg != dst_reg)
rewriter->assembler->mov(src_reg, dst_reg);
setKnownValue(dst_reg, val);
return true;
}
return false;
}
bool Rewriter::ConstLoader::tryLea(uint64_t val, assembler::Register dst_reg) {
// for large constants it maybe beneficial to create the value with a LEA from a known const value
if (isLargeConstant(val)) {
for (int reg_num = 0; reg_num < assembler::Register::numRegs(); ++reg_num) {
if (!hasKnownValue(assembler::Register(reg_num)))
continue;
int64_t offset = val - last_known_value[reg_num];
if (isLargeConstant(offset))
continue; // LEA can only handle small offsets
rewriter->assembler->lea(assembler::Indirect(assembler::Register(reg_num), offset), dst_reg);
setKnownValue(dst_reg, val);
return true;
}
// TODO: maybe add RIP relative LEA
}
return false;
}
void Rewriter::ConstLoader::moveImmediate(uint64_t val, assembler::Register dst_reg) {
// fallback use a normal: mov reg, imm
rewriter->assembler->mov(assembler::Immediate(val), dst_reg);
setKnownValue(dst_reg, val);
}
void Rewriter::ConstLoader::invalidateAll() {
for (int reg_num = 0; reg_num < assembler::Register::numRegs(); ++reg_num)
last_known_value[reg_num] = unknown_value;
}
assembler::Register Rewriter::ConstLoader::findConst(uint64_t val, bool& found_value) {
found_value = false;
if (unknown_value == val)
return assembler::Register(0);
for (int reg_num = 0; reg_num < assembler::Register::numRegs(); ++reg_num) {
if (last_known_value[reg_num] == val) {
found_value = true;
return assembler::Register(reg_num);
}
}
return assembler::Register(0);
}
void Rewriter::ConstLoader::loadConstIntoReg(uint64_t val, assembler::Register dst_reg) {
if (tryRegRegMove(val, dst_reg))
return;
if (tryLea(val, dst_reg))
return;
moveImmediate(val, dst_reg);
}
assembler::Register Rewriter::ConstLoader::loadConst(uint64_t val, Location otherThan) {
bool found_value = false;
assembler::Register reg = findConst(val, found_value);
if (found_value)
return reg;
reg = rewriter->allocReg(Location::any(), otherThan);
if (tryLea(val, reg))
return reg;
moveImmediate(val, reg);
return reg;
}
void Rewriter::_addGuard(RewriterVar* var, uint64_t val) { void Rewriter::_addGuard(RewriterVar* var, uint64_t val) {
assembler::Register var_reg = var->getInReg(); assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) { if (isLargeConstant(val)) {
assembler::Register reg = allocReg(Location::any(), /* otherThan */ var_reg); assembler::Register reg = const_loader.loadConst(val, /* otherThan */ var_reg);
assert(reg != var_reg);
assembler->mov(assembler::Immediate(val), reg);
assembler->cmp(var_reg, reg); assembler->cmp(var_reg, reg);
} else { } else {
assembler->cmp(var_reg, assembler::Immediate(val)); assembler->cmp(var_reg, assembler::Immediate(val));
...@@ -160,9 +246,7 @@ void RewriterVar::addGuardNotEq(uint64_t val) { ...@@ -160,9 +246,7 @@ void RewriterVar::addGuardNotEq(uint64_t val) {
void Rewriter::_addGuardNotEq(RewriterVar* var, uint64_t val) { void Rewriter::_addGuardNotEq(RewriterVar* var, uint64_t val) {
assembler::Register var_reg = var->getInReg(); assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) { if (isLargeConstant(val)) {
assembler::Register reg = allocReg(Location::any(), /* otherThan */ var_reg); assembler::Register reg = const_loader.loadConst(val, /* otherThan */ var_reg);
assert(var_reg != reg);
assembler->mov(assembler::Immediate(val), reg);
assembler->cmp(var_reg, reg); assembler->cmp(var_reg, reg);
} else { } else {
assembler->cmp(var_reg, assembler::Immediate(val)); assembler->cmp(var_reg, assembler::Immediate(val));
...@@ -189,9 +273,7 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, uint64_t val, bool ne ...@@ -189,9 +273,7 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, uint64_t val, bool ne
assembler::Register var_reg = var->getInReg(Location::any(), /* allow_constant_in_reg */ true); assembler::Register var_reg = var->getInReg(Location::any(), /* allow_constant_in_reg */ true);
if (isLargeConstant(val)) { if (isLargeConstant(val)) {
assembler::Register reg = allocReg(Location::any(), /* otherThan */ var_reg); assembler::Register reg = const_loader.loadConst(val, /* otherThan */ var_reg);
assert(reg != var_reg);
assembler->mov(assembler::Immediate(val), reg);
assembler->cmp(assembler::Indirect(var_reg, offset), reg); assembler->cmp(assembler::Indirect(var_reg, offset), reg);
} else { } else {
assembler->cmp(assembler::Indirect(var_reg, offset), assembler::Immediate(val)); assembler->cmp(assembler::Indirect(var_reg, offset), assembler::Immediate(val));
...@@ -397,6 +479,7 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_ ...@@ -397,6 +479,7 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
assert(dest_reg != reg); // should have been caught by the previous case assert(dest_reg != reg); // should have been caught by the previous case
rewriter->assembler->mov(reg, dest_reg); rewriter->assembler->mov(reg, dest_reg);
rewriter->const_loader.copy(reg, dest_reg);
rewriter->addLocationToVar(this, dest_reg); rewriter->addLocationToVar(this, dest_reg);
return dest_reg; return dest_reg;
} else { } else {
...@@ -515,7 +598,7 @@ RewriterVar* Rewriter::loadConst(int64_t val, Location dest) { ...@@ -515,7 +598,7 @@ RewriterVar* Rewriter::loadConst(int64_t val, Location dest) {
void Rewriter::_loadConst(RewriterVar* result, int64_t val, Location dest) { void Rewriter::_loadConst(RewriterVar* result, int64_t val, Location dest) {
assembler::Register reg = allocReg(dest); assembler::Register reg = allocReg(dest);
assembler->mov(assembler::Immediate(val), reg); const_loader.loadConstIntoReg(val, reg);
result->initializeInReg(reg); result->initializeInReg(reg);
result->releaseIfNoUses(); result->releaseIfNoUses();
...@@ -731,8 +814,9 @@ void Rewriter::_call(RewriterVar* result, bool can_call_into_python, void* func_ ...@@ -731,8 +814,9 @@ void Rewriter::_call(RewriterVar* result, bool can_call_into_python, void* func_
} }
#endif #endif
assembler->mov(assembler::Immediate(func_addr), r); const_loader.loadConstIntoReg((uint64_t)func_addr, r);
assembler->callq(r); assembler->callq(r);
const_loader.invalidateAll(); // TODO: we only need to invalidate the clobbered regs
assert(vars_by_location.count(assembler::RAX) == 0); assert(vars_by_location.count(assembler::RAX) == 0);
result->initializeInReg(assembler::RAX); result->initializeInReg(assembler::RAX);
...@@ -1194,6 +1278,8 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) { ...@@ -1194,6 +1278,8 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) {
continue; continue;
assembler->mov(reg, new_reg); assembler->mov(reg, new_reg);
const_loader.copy(reg, new_reg);
addLocationToVar(var, new_reg); addLocationToVar(var, new_reg);
removeLocationFromVar(var, reg); removeLocationFromVar(var, reg);
return; return;
...@@ -1234,9 +1320,21 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { ...@@ -1234,9 +1320,21 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
int best = -1; int best = -1;
bool found = false; bool found = false;
assembler::Register best_reg(0); assembler::Register best_reg(0);
// prefer registers which don't have a known const value
for (assembler::Register reg : allocatable_regs) {
if (Location(reg) != otherThan) {
if (!const_loader.hasKnownValue(reg) && vars_by_location.count(reg) == 0) {
const_loader.invalidate(reg);
return reg;
}
}
}
for (assembler::Register reg : allocatable_regs) { for (assembler::Register reg : allocatable_regs) {
if (Location(reg) != otherThan) { if (Location(reg) != otherThan) {
if (vars_by_location.count(reg) == 0) { if (vars_by_location.count(reg) == 0) {
const_loader.invalidate(reg);
return reg; return reg;
} }
RewriterVar* var = vars_by_location[reg]; RewriterVar* var = vars_by_location[reg];
...@@ -1255,6 +1353,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { ...@@ -1255,6 +1353,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
assert(found); assert(found);
spillRegister(best_reg, /* preserve */ otherThan); spillRegister(best_reg, /* preserve */ otherThan);
assert(vars_by_location.count(best_reg) == 0); assert(vars_by_location.count(best_reg) == 0);
const_loader.invalidate(best_reg);
return best_reg; return best_reg;
} else if (dest.type == Location::Register) { } else if (dest.type == Location::Register) {
assembler::Register reg(dest.regnum); assembler::Register reg(dest.regnum);
...@@ -1264,6 +1363,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) { ...@@ -1264,6 +1363,7 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
} }
assert(vars_by_location.count(reg) == 0); assert(vars_by_location.count(reg) == 0);
const_loader.invalidate(reg);
return reg; return reg;
} else { } else {
RELEASE_ASSERT(0, "%d", dest.type); RELEASE_ASSERT(0, "%d", dest.type);
...@@ -1382,6 +1482,7 @@ TypeRecorder* Rewriter::getTypeRecorder() { ...@@ -1382,6 +1482,7 @@ TypeRecorder* Rewriter::getTypeRecorder() {
Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>& live_outs) Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>& live_outs)
: rewrite(rewrite), : rewrite(rewrite),
assembler(rewrite->getAssembler()), assembler(rewrite->getAssembler()),
const_loader(this),
return_location(rewrite->returnRegister()), return_location(rewrite->returnRegister()),
added_changing_action(false), added_changing_action(false),
marked_inside_ic(false), marked_inside_ic(false),
......
...@@ -121,8 +121,8 @@ namespace pyston { ...@@ -121,8 +121,8 @@ namespace pyston {
// Replacement for unordered_map<Location, T> // Replacement for unordered_map<Location, T>
template <class T> class LocMap { template <class T> class LocMap {
private: private:
static const int N_REGS = 16; static const int N_REGS = assembler::Register::numRegs();
static const int N_XMM = 16; static const int N_XMM = assembler::XMMRegister::numRegs();
static const int N_SCRATCH = 32; static const int N_SCRATCH = 32;
static const int N_STACK = 16; static const int N_STACK = 16;
...@@ -306,9 +306,44 @@ enum class ActionType { NORMAL, GUARD, MUTATION }; ...@@ -306,9 +306,44 @@ enum class ActionType { NORMAL, GUARD, MUTATION };
class Rewriter : public ICSlotRewrite::CommitHook { class Rewriter : public ICSlotRewrite::CommitHook {
private: private:
// Helps generating the best code for loading a const integer value.
// By keeping track of the last known value of every register and reusing it.
class ConstLoader {
private:
uint64_t last_known_value[assembler::Register::numRegs()];
const uint64_t unknown_value = 0;
Rewriter* rewriter;
bool tryRegRegMove(uint64_t val, assembler::Register dst_reg);
bool tryLea(uint64_t val, assembler::Register dst_reg);
void moveImmediate(uint64_t val, assembler::Register dst_reg);
public:
ConstLoader(Rewriter* rewriter);
bool hasKnownValue(assembler::Register reg) const { return last_known_value[reg.regnum] != unknown_value; }
uint64_t getKnownValue(assembler::Register reg) const { return last_known_value[reg.regnum]; }
void setKnownValue(assembler::Register reg, uint64_t val) { last_known_value[reg.regnum] = val; }
void invalidate(assembler::Register reg) { setKnownValue(reg, unknown_value); }
void invalidateAll();
void copy(assembler::Register src_reg, assembler::Register dst_reg) {
setKnownValue(dst_reg, getKnownValue(src_reg));
}
// Searches if the specified value is already loaded into a register and if so it return the register
assembler::Register findConst(uint64_t val, bool& found_value);
// Loads the constant into the specified register
void loadConstIntoReg(uint64_t val, assembler::Register reg);
// Loads the constant into any register or if already in a register just return it
assembler::Register loadConst(uint64_t val, Location otherThan = Location::any());
};
std::unique_ptr<ICSlotRewrite> rewrite; std::unique_ptr<ICSlotRewrite> rewrite;
assembler::Assembler* assembler; assembler::Assembler* assembler;
ConstLoader const_loader;
std::vector<RewriterVar*> vars; std::vector<RewriterVar*> vars;
const Location return_location; const Location return_location;
......
...@@ -47,6 +47,8 @@ struct Register { ...@@ -47,6 +47,8 @@ struct Register {
void dump() const; void dump() const;
static Register fromDwarf(int dwarf_regnum); static Register fromDwarf(int dwarf_regnum);
static constexpr int numRegs() { return 16; }
}; };
const Register RAX(0); const Register RAX(0);
...@@ -88,6 +90,8 @@ struct XMMRegister { ...@@ -88,6 +90,8 @@ struct XMMRegister {
bool operator!=(const XMMRegister& rhs) const { return !(*this == rhs); } bool operator!=(const XMMRegister& rhs) const { return !(*this == rhs); }
void dump() const { printf("XMM%d\n", regnum); } void dump() const { printf("XMM%d\n", regnum); }
static constexpr int numRegs() { return 16; }
}; };
const XMMRegister XMM0(0); const XMMRegister XMM0(0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment