Commit cf145fb8 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #617 from tjhance/lea

constant loading optimizations in rewriter
parents 3afc5d2e 25521cb7
......@@ -789,6 +789,38 @@ void Assembler::cmp(Indirect mem, Register reg) {
}
}
void Assembler::lea(Indirect mem, Register reg) {
int mem_idx = mem.base.regnum;
int reg_idx = reg.regnum;
int rex = REX_W;
if (mem_idx >= 8) {
rex |= REX_B;
mem_idx -= 8;
}
if (reg_idx >= 8) {
rex |= REX_R;
reg_idx -= 8;
}
assert(mem_idx >= 0 && mem_idx < 8);
assert(reg_idx >= 0 && reg_idx < 8);
emitRex(rex);
emitByte(0x8D);
if (mem.offset == 0) {
emitModRM(0b00, reg_idx, mem_idx);
} else if (-0x80 <= mem.offset && mem.offset < 0x80) {
emitModRM(0b01, reg_idx, mem_idx);
emitByte(mem.offset);
} else {
assert((-1L << 31) <= mem.offset && mem.offset < (1L << 31) - 1);
emitModRM(0b10, reg_idx, mem_idx);
emitInt(mem.offset, 4);
}
}
void Assembler::test(Register reg1, Register reg2) {
int reg1_idx = reg1.regnum;
int reg2_idx = reg2.regnum;
......
......@@ -136,6 +136,8 @@ public:
void cmp(Indirect mem, Immediate imm);
void cmp(Indirect mem, Register reg);
void lea(Indirect mem, Register reg);
void test(Register reg1, Register reg2);
void jmp_cond(JumpDestination dest, ConditionCode condition);
......
......@@ -90,9 +90,6 @@ bool Location::isClobberedByCall() const {
if (type == Scratch)
return false;
if (type == Constant)
return false;
if (type == Stack)
return false;
......@@ -115,13 +112,23 @@ void Location::dump() const {
return;
}
if (type == Constant) {
printf("imm(%d)\n", constant_val);
if (type == Stack) {
printf("stack(%d)\n", stack_offset);
return;
}
if (type == Stack) {
printf("stack(%d)\n", stack_offset);
if (type == AnyReg) {
printf("anyreg\n");
return;
}
if (type == None) {
printf("none\n");
return;
}
if (type == Uninitialized) {
printf("uninitialized\n");
return;
}
......@@ -132,16 +139,111 @@ static bool isLargeConstant(int64_t val) {
return (val < (-1L << 31) || val >= (1L << 31) - 1);
}
Rewriter::ConstLoader::ConstLoader(Rewriter* rewriter) : rewriter(rewriter) {
}
bool Rewriter::ConstLoader::tryRegRegMove(uint64_t val, assembler::Register dst_reg) {
assert(rewriter->phase_emitting);
// copy the value if there is a register which contains already the value
bool found_value = false;
assembler::Register src_reg = findConst(val, found_value);
if (found_value) {
if (src_reg != dst_reg)
rewriter->assembler->mov(src_reg, dst_reg);
return true;
}
return false;
}
bool Rewriter::ConstLoader::tryLea(uint64_t val, assembler::Register dst_reg) {
assert(rewriter->phase_emitting);
// for large constants it maybe beneficial to create the value with a LEA from a known const value
if (isLargeConstant(val)) {
for (int reg_num = 0; reg_num < assembler::Register::numRegs(); ++reg_num) {
RewriterVar* var = rewriter->vars_by_location[assembler::Register(reg_num)];
if (var == NULL)
continue;
if (!var->is_constant)
continue;
int64_t offset = val - var->constant_value;
if (isLargeConstant(offset))
continue; // LEA can only handle small offsets
rewriter->assembler->lea(assembler::Indirect(assembler::Register(reg_num), offset), dst_reg);
return true;
}
// TODO: maybe add RIP relative LEA
}
return false;
}
void Rewriter::ConstLoader::moveImmediate(uint64_t val, assembler::Register dst_reg) {
assert(rewriter->phase_emitting);
// fallback use a normal: mov reg, imm
rewriter->assembler->mov(assembler::Immediate(val), dst_reg);
}
assembler::Register Rewriter::ConstLoader::findConst(uint64_t val, bool& found_value) {
assert(rewriter->phase_emitting);
if (constToVar.count(val) > 0) {
RewriterVar* var = constToVar[val];
for (Location l : var->locations) {
if (l.type == Location::Register) {
found_value = true;
return l.asRegister();
}
}
}
found_value = false;
return assembler::Register(0);
}
void Rewriter::ConstLoader::loadConstIntoReg(uint64_t val, assembler::Register dst_reg) {
assert(rewriter->phase_emitting);
if (tryRegRegMove(val, dst_reg))
return;
if (tryLea(val, dst_reg))
return;
moveImmediate(val, dst_reg);
}
assembler::Register Rewriter::ConstLoader::loadConst(uint64_t val, Location otherThan) {
assert(rewriter->phase_emitting);
bool found_value = false;
assembler::Register reg = findConst(val, found_value);
if (found_value)
return reg;
reg = rewriter->allocReg(Location::any(), otherThan);
if (tryLea(val, reg))
return reg;
moveImmediate(val, reg);
return reg;
}
void RewriterVar::addGuard(uint64_t val) {
rewriter->addAction([=]() { rewriter->_addGuard(this, val); }, { this }, ActionType::GUARD);
RewriterVar* val_var = rewriter->loadConst(val);
rewriter->addAction([=]() { rewriter->_addGuard(this, val_var); }, { this, val_var }, ActionType::GUARD);
}
void Rewriter::_addGuard(RewriterVar* var, uint64_t val) {
void Rewriter::_addGuard(RewriterVar* var, RewriterVar* val_constant) {
assert(val_constant->is_constant);
uint64_t val = val_constant->constant_value;
assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) {
assembler::Register reg = allocReg(Location::any(), /* otherThan */ var_reg);
assert(reg != var_reg);
assembler->mov(assembler::Immediate(val), reg);
assembler::Register reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg);
assembler->cmp(var_reg, reg);
} else {
assembler->cmp(var_reg, assembler::Immediate(val));
......@@ -149,20 +251,23 @@ void Rewriter::_addGuard(RewriterVar* var, uint64_t val) {
assembler->jne(assembler::JumpDestination::fromStart(rewrite->getSlotSize()));
var->bumpUse();
val_constant->bumpUse();
assertConsistent();
}
void RewriterVar::addGuardNotEq(uint64_t val) {
rewriter->addAction([=]() { rewriter->_addGuardNotEq(this, val); }, { this }, ActionType::GUARD);
RewriterVar* val_var = rewriter->loadConst(val);
rewriter->addAction([=]() { rewriter->_addGuardNotEq(this, val_var); }, { this, val_var }, ActionType::GUARD);
}
void Rewriter::_addGuardNotEq(RewriterVar* var, uint64_t val) {
void Rewriter::_addGuardNotEq(RewriterVar* var, RewriterVar* val_constant) {
assert(val_constant->is_constant);
uint64_t val = val_constant->constant_value;
assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) {
assembler::Register reg = allocReg(Location::any(), /* otherThan */ var_reg);
assert(var_reg != reg);
assembler->mov(assembler::Immediate(val), reg);
assembler::Register reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg);
assembler->cmp(var_reg, reg);
} else {
assembler->cmp(var_reg, assembler::Immediate(val));
......@@ -170,6 +275,7 @@ void Rewriter::_addGuardNotEq(RewriterVar* var, uint64_t val) {
assembler->je(assembler::JumpDestination::fromStart(rewrite->getSlotSize()));
var->bumpUse();
val_constant->bumpUse();
assertConsistent();
}
......@@ -177,10 +283,15 @@ void Rewriter::_addGuardNotEq(RewriterVar* var, uint64_t val) {
void RewriterVar::addAttrGuard(int offset, uint64_t val, bool negate) {
if (!attr_guards.insert(std::make_tuple(offset, val, negate)).second)
return; // duplicate guard detected
rewriter->addAction([=]() { rewriter->_addAttrGuard(this, offset, val, negate); }, { this }, ActionType::GUARD);
RewriterVar* val_var = rewriter->loadConst(val);
rewriter->addAction([=]() { rewriter->_addAttrGuard(this, offset, val_var, negate); }, { this, val_var },
ActionType::GUARD);
}
void Rewriter::_addAttrGuard(RewriterVar* var, int offset, uint64_t val, bool negate) {
void Rewriter::_addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_constant, bool negate) {
assert(val_constant->is_constant);
uint64_t val = val_constant->constant_value;
// TODO if var is a constant, we will end up emitting something like
// mov $0x123, %rax
// cmp $0x10(%rax), %rdi
......@@ -189,9 +300,17 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, uint64_t val, bool ne
assembler::Register var_reg = var->getInReg(Location::any(), /* allow_constant_in_reg */ true);
if (isLargeConstant(val)) {
assembler::Register reg = allocReg(Location::any(), /* otherThan */ var_reg);
assert(reg != var_reg);
assembler->mov(assembler::Immediate(val), reg);
assembler::Register reg(0);
if (val_constant == var) {
// TODO This case actually shows up, but it's stuff like guarding that type_cls->cls == type_cls
// I think we can optimize this case out, and in general, we can probably optimize out
// any case where var is constant.
reg = var_reg;
} else {
reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg);
}
assembler->cmp(assembler::Indirect(var_reg, offset), reg);
} else {
assembler->cmp(assembler::Indirect(var_reg, offset), assembler::Immediate(val));
......@@ -202,6 +321,7 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, uint64_t val, bool ne
assembler->jne(assembler::JumpDestination::fromStart(rewrite->getSlotSize()));
var->bumpUse();
val_constant->bumpUse();
assertConsistent();
}
......@@ -356,31 +476,33 @@ void RewriterVar::dump() {
}
assembler::Immediate RewriterVar::tryGetAsImmediate(bool* is_immediate) {
for (Location l : locations) {
if (l.type == Location::Constant) {
*is_immediate = true;
return assembler::Immediate(l.constant_val);
}
if (this->is_constant && !isLargeConstant(this->constant_value)) {
*is_immediate = true;
return assembler::Immediate(this->constant_value);
} else {
*is_immediate = false;
return assembler::Immediate((uint64_t)0);
}
*is_immediate = false;
return assembler::Immediate((uint64_t)0);
}
assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_reg, Location otherThan) {
assert(dest.type == Location::Register || dest.type == Location::AnyReg);
// assembler::Register reg = var->rewriter->allocReg(l);
// var->rewriter->addLocationToVar(var, reg);
// return reg;
assert(locations.size());
#ifndef NDEBUG
if (!allow_constant_in_reg) {
for (Location l : locations) {
ASSERT(l.type != Location::Constant, "why do you want this in a register?");
}
assert(!is_constant || isLargeConstant(constant_value));
}
#endif
if (locations.size() == 0 && this->is_constant) {
assembler::Register reg = rewriter->allocReg(dest, otherThan);
rewriter->const_loader.loadConstIntoReg(this->constant_value, reg);
rewriter->addLocationToVar(this, reg);
return reg;
}
assert(locations.size());
// Not sure if this is worth it,
// but first try to see if we're already in this specific register
for (Location l : locations) {
......@@ -415,9 +537,7 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
assembler::Register reg = rewriter->allocReg(dest, otherThan);
assert(rewriter->vars_by_location.count(reg) == 0);
if (l.type == Location::Constant) {
rewriter->assembler->mov(assembler::Immediate(l.constant_val), reg);
} else if (l.type == Location::Scratch || l.type == Location::Stack) {
if (l.type == Location::Scratch || l.type == Location::Stack) {
assembler::Indirect mem = rewriter->indirectFor(l);
rewriter->assembler->mov(mem, reg);
} else {
......@@ -431,12 +551,8 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
assembler::XMMRegister RewriterVar::getInXMMReg(Location dest) {
assert(dest.type == Location::XMMRegister || dest.type == Location::AnyReg);
assert(!this->is_constant);
assert(locations.size());
#ifndef NDEBUG
for (Location l : locations) {
ASSERT(l.type != Location::Constant, "why do you want this in a register?");
}
#endif
// Not sure if this is worth it,
// but first try to see if we're already in this specific register
......@@ -498,28 +614,11 @@ void Rewriter::_trap() {
}
RewriterVar* Rewriter::loadConst(int64_t val, Location dest) {
if (!isLargeConstant(val)) {
Location l(Location::Constant, val);
RewriterVar*& var = vars_by_location[l];
if (!var) {
var = createNewVar();
var->locations.insert(l);
}
return var;
} else {
RewriterVar* result = createNewVar();
addAction([=]() { this->_loadConst(result, val, dest); }, {}, ActionType::NORMAL);
return result;
RewriterVar*& const_loader_var = const_loader.constToVar[val];
if (!const_loader_var) {
const_loader_var = createNewConstantVar(val);
}
}
void Rewriter::_loadConst(RewriterVar* result, int64_t val, Location dest) {
assembler::Register reg = allocReg(dest);
assembler->mov(assembler::Immediate(val), reg);
result->initializeInReg(reg);
result->releaseIfNoUses();
assertConsistent();
return const_loader_var;
}
RewriterVar* Rewriter::call(bool can_call_into_python, void* func_addr) {
......@@ -731,7 +830,7 @@ void Rewriter::_call(RewriterVar* result, bool can_call_into_python, void* func_
}
#endif
assembler->mov(assembler::Immediate(func_addr), r);
const_loader.loadConstIntoReg((uint64_t)func_addr, r);
assembler->callq(r);
assert(vars_by_location.count(assembler::RAX) == 0);
......@@ -1179,7 +1278,8 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) {
assert(var);
// There may be no need to spill if the var is held in a different location already.
if (var->locations.size() > 1) {
// There is no need to spill if it is a constant
if (var->locations.size() > 1 || var->is_constant) {
removeLocationFromVar(var, reg);
return;
}
......@@ -1194,6 +1294,7 @@ void Rewriter::spillRegister(assembler::Register reg, Location preserve) {
continue;
assembler->mov(reg, new_reg);
addLocationToVar(var, new_reg);
removeLocationFromVar(var, reg);
return;
......@@ -1234,6 +1335,8 @@ assembler::Register Rewriter::allocReg(Location dest, Location otherThan) {
int best = -1;
bool found = false;
assembler::Register best_reg(0);
// TODO prioritize spilling a constant register?
for (assembler::Register reg : allocatable_regs) {
if (Location(reg) != otherThan) {
if (vars_by_location.count(reg) == 0) {
......@@ -1307,14 +1410,19 @@ void Rewriter::addLocationToVar(RewriterVar* var, Location l) {
var->locations.insert(l);
vars_by_location[l] = var;
#ifndef NDEBUG
// Check that the var is not in more than one of: stack, scratch, const
int count = 0;
if (var->is_constant && !isLargeConstant(var->constant_value)) {
count++;
}
for (Location l : var->locations) {
if (l.type == Location::Stack || l.type == Location::Scratch || l.type == Location::Constant) {
if (l.type == Location::Stack || l.type == Location::Scratch) {
count++;
}
}
assert(count <= 1);
#endif
}
void Rewriter::removeLocationFromVar(RewriterVar* var, Location l) {
......@@ -1333,6 +1441,13 @@ RewriterVar* Rewriter::createNewVar() {
return var;
}
RewriterVar* Rewriter::createNewConstantVar(uint64_t val) {
RewriterVar* var = createNewVar();
var->is_constant = true;
var->constant_value = val;
return var;
}
assembler::Register RewriterVar::initializeInReg(Location l) {
rewriter->assertPhaseEmitting();
......@@ -1382,6 +1497,7 @@ TypeRecorder* Rewriter::getTypeRecorder() {
Rewriter::Rewriter(ICSlotRewrite* rewrite, int num_args, const std::vector<int>& live_outs)
: rewrite(rewrite),
assembler(rewrite->getAssembler()),
const_loader(this),
return_location(rewrite->returnRegister()),
added_changing_action(false),
marked_inside_ic(false),
......
......@@ -19,6 +19,7 @@
#include <memory>
#include <tuple>
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "asm_writing/assembler.h"
......@@ -45,7 +46,6 @@ public:
Scratch, // stack location, relative to the scratch start
// For representing constants that fit in 32-bits, that can be encoded as immediates
Constant,
AnyReg, // special type for use when specifying a location as a destination
None, // special type that represents the lack of a location, ex where a "ret void" gets returned
Uninitialized, // special type for an uninitialized (and invalid) location
......@@ -64,9 +64,6 @@ public:
// only valid if type == Scratch; offset from the beginning of the scratch area
int32_t scratch_offset;
// only valid if type==Constant
int32_t constant_val;
int32_t _data;
};
......@@ -121,8 +118,8 @@ namespace pyston {
// Replacement for unordered_map<Location, T>
template <class T> class LocMap {
private:
static const int N_REGS = 16;
static const int N_XMM = 16;
static const int N_REGS = assembler::Register::numRegs();
static const int N_XMM = assembler::XMMRegister::numRegs();
static const int N_SCRATCH = 32;
static const int N_STACK = 16;
......@@ -130,7 +127,6 @@ private:
T map_xmm[N_XMM];
T map_scratch[N_SCRATCH];
T map_stack[N_STACK];
std::unordered_map<int32_t, T> map_const;
public:
LocMap() {
......@@ -158,8 +154,6 @@ public:
assert(0 <= l.scratch_offset / 8);
assert(l.scratch_offset / 8 < N_SCRATCH);
return map_scratch[l.scratch_offset / 8];
case Location::Constant:
return map_const[l.constant_val];
default:
RELEASE_ASSERT(0, "%d", l.type);
}
......@@ -196,11 +190,6 @@ public:
m.emplace(Location(Location::Stack, i * 8), map_stack[i]);
}
}
for (std::pair<int32_t, RewriterVar*> p : map_const) {
if (p.second != NULL) {
m.emplace(Location(Location::Constant, p.first), p.second);
}
}
return m;
}
#endif
......@@ -253,6 +242,9 @@ private:
bool is_arg;
Location arg_loc;
bool is_constant;
uint64_t constant_value;
llvm::SmallSet<std::tuple<int, uint64_t, bool>, 4> attr_guards; // used to detect duplicate guards
// Gets a copy of this variable in a register, spilling/reloading if necessary.
......@@ -278,7 +270,7 @@ public:
static int nvars;
#endif
RewriterVar(Rewriter* rewriter) : rewriter(rewriter), next_use(0), is_arg(false) {
RewriterVar(Rewriter* rewriter) : rewriter(rewriter), next_use(0), is_arg(false), is_constant(false) {
#ifndef NDEBUG
nvars++;
#endif
......@@ -306,9 +298,36 @@ enum class ActionType { NORMAL, GUARD, MUTATION };
class Rewriter : public ICSlotRewrite::CommitHook {
private:
// Helps generating the best code for loading a const integer value.
// By keeping track of the last known value of every register and reusing it.
class ConstLoader {
private:
const uint64_t unknown_value = 0;
Rewriter* rewriter;
bool tryRegRegMove(uint64_t val, assembler::Register dst_reg);
bool tryLea(uint64_t val, assembler::Register dst_reg);
void moveImmediate(uint64_t val, assembler::Register dst_reg);
public:
ConstLoader(Rewriter* rewriter);
// Searches if the specified value is already loaded into a register and if so it return the register
assembler::Register findConst(uint64_t val, bool& found_value);
// Loads the constant into the specified register
void loadConstIntoReg(uint64_t val, assembler::Register reg);
// Loads the constant into any register or if already in a register just return it
assembler::Register loadConst(uint64_t val, Location otherThan = Location::any());
llvm::DenseMap<uint64_t, RewriterVar*> constToVar;
};
std::unique_ptr<ICSlotRewrite> rewrite;
assembler::Assembler* assembler;
ConstLoader const_loader;
std::vector<RewriterVar*> vars;
const Location return_location;
......@@ -379,6 +398,8 @@ private:
// Create a new var with no location.
RewriterVar* createNewVar();
RewriterVar* createNewConstantVar(uint64_t val);
// Do the bookkeeping to say that var is now also in location l
void addLocationToVar(RewriterVar* var, Location l);
// Do the bookkeeping to say that var is no longer in location l
......@@ -387,7 +408,7 @@ private:
bool finishAssembly(ICSlotInfo* picked_slot, int continue_offset) override;
void _trap();
void _loadConst(RewriterVar* result, int64_t val, Location loc);
void _loadConst(RewriterVar* result, int64_t val);
void _call(RewriterVar* result, bool can_call_into_python, void* func_addr, const RewriterVar::SmallVector& args,
const RewriterVar::SmallVector& args_xmm);
void _add(RewriterVar* result, RewriterVar* a, int64_t b, Location dest);
......@@ -396,9 +417,9 @@ private:
void _allocateAndCopyPlus1(RewriterVar* result, RewriterVar* first_elem, RewriterVar* rest, int n_rest);
// The public versions of these are in RewriterVar
void _addGuard(RewriterVar* var, uint64_t val);
void _addGuardNotEq(RewriterVar* var, uint64_t val);
void _addAttrGuard(RewriterVar* var, int offset, uint64_t val, bool negate = false);
void _addGuard(RewriterVar* var, RewriterVar* val_constant);
void _addGuardNotEq(RewriterVar* var, RewriterVar* val_constant);
void _addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_constant, bool negate = false);
void _getAttr(RewriterVar* result, RewriterVar* var, int offset, Location loc = Location::any(),
assembler::MovType type = assembler::MovType::Q);
void _getAttrFloat(RewriterVar* result, RewriterVar* var, int offset, Location loc = Location::any());
......
......@@ -47,6 +47,8 @@ struct Register {
void dump() const;
static Register fromDwarf(int dwarf_regnum);
static constexpr int numRegs() { return 16; }
};
const Register RAX(0);
......@@ -88,6 +90,8 @@ struct XMMRegister {
bool operator!=(const XMMRegister& rhs) const { return !(*this == rhs); }
void dump() const { printf("XMM%d\n", regnum); }
static constexpr int numRegs() { return 16; }
};
const XMMRegister XMM0(0);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment