Commit 4db48f10 authored by Kevin Modzelewski's avatar Kevin Modzelewski

Merge pull request #656 from tjhance/fix-lea

Fix lea
parents b7873045 227d9e2b
// Copyright (c) 2014-2015 Dropbox, Inc. // Copyright (c) 2014-2015 Dropbox, Inc. //
//
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
// You may obtain a copy of the License at // You may obtain a copy of the License at
...@@ -248,6 +247,18 @@ void Rewriter::restoreArgs() { ...@@ -248,6 +247,18 @@ void Rewriter::restoreArgs() {
} }
} }
for (int i = 0; i < live_outs.size(); i++) {
assembler::GenericRegister gr = assembler::GenericRegister::fromDwarf(live_out_regs[i]);
if (gr.type == assembler::GenericRegister::GP) {
assembler::Register r = gr.gp;
if (!live_outs[i]->isInLocation(Location(r))) {
allocReg(r);
live_outs[i]->getInReg(r);
assert(live_outs[i]->isInLocation(r));
}
}
}
assertArgsInPlace(); assertArgsInPlace();
} }
...@@ -257,6 +268,10 @@ void Rewriter::assertArgsInPlace() { ...@@ -257,6 +268,10 @@ void Rewriter::assertArgsInPlace() {
for (int i = 0; i < args.size(); i++) { for (int i = 0; i < args.size(); i++) {
assert(args[i]->isInLocation(args[i]->arg_loc)); assert(args[i]->isInLocation(args[i]->arg_loc));
} }
for (int i = 0; i < live_outs.size(); i++) {
assembler::GenericRegister r = assembler::GenericRegister::fromDwarf(live_out_regs[i]);
assert(live_outs[i]->isInLocation(r));
}
} }
void RewriterVar::addGuard(uint64_t val) { void RewriterVar::addGuard(uint64_t val) {
...@@ -272,8 +287,6 @@ void Rewriter::_addGuard(RewriterVar* var, RewriterVar* val_constant) { ...@@ -272,8 +287,6 @@ void Rewriter::_addGuard(RewriterVar* var, RewriterVar* val_constant) {
assert(val_constant->is_constant); assert(val_constant->is_constant);
uint64_t val = val_constant->constant_value; uint64_t val = val_constant->constant_value;
restoreArgs();
assembler::Register var_reg = var->getInReg(); assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) { if (isLargeConstant(val)) {
assembler::Register reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg); assembler::Register reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg);
...@@ -282,6 +295,7 @@ void Rewriter::_addGuard(RewriterVar* var, RewriterVar* val_constant) { ...@@ -282,6 +295,7 @@ void Rewriter::_addGuard(RewriterVar* var, RewriterVar* val_constant) {
assembler->cmp(var_reg, assembler::Immediate(val)); assembler->cmp(var_reg, assembler::Immediate(val));
} }
restoreArgs(); // can only do movs, doesn't affect flags, so it's safe
assertArgsInPlace(); assertArgsInPlace();
assembler->jne(assembler::JumpDestination::fromStart(rewrite->getSlotSize())); assembler->jne(assembler::JumpDestination::fromStart(rewrite->getSlotSize()));
...@@ -304,8 +318,6 @@ void Rewriter::_addGuardNotEq(RewriterVar* var, RewriterVar* val_constant) { ...@@ -304,8 +318,6 @@ void Rewriter::_addGuardNotEq(RewriterVar* var, RewriterVar* val_constant) {
assert(val_constant->is_constant); assert(val_constant->is_constant);
uint64_t val = val_constant->constant_value; uint64_t val = val_constant->constant_value;
restoreArgs();
assembler::Register var_reg = var->getInReg(); assembler::Register var_reg = var->getInReg();
if (isLargeConstant(val)) { if (isLargeConstant(val)) {
assembler::Register reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg); assembler::Register reg = val_constant->getInReg(Location::any(), true, /* otherThan */ var_reg);
...@@ -314,6 +326,7 @@ void Rewriter::_addGuardNotEq(RewriterVar* var, RewriterVar* val_constant) { ...@@ -314,6 +326,7 @@ void Rewriter::_addGuardNotEq(RewriterVar* var, RewriterVar* val_constant) {
assembler->cmp(var_reg, assembler::Immediate(val)); assembler->cmp(var_reg, assembler::Immediate(val));
} }
restoreArgs(); // can only do movs, doesn't affect flags, so it's safe
assertArgsInPlace(); assertArgsInPlace();
assembler->je(assembler::JumpDestination::fromStart(rewrite->getSlotSize())); assembler->je(assembler::JumpDestination::fromStart(rewrite->getSlotSize()));
...@@ -339,8 +352,6 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_cons ...@@ -339,8 +352,6 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_cons
assert(val_constant->is_constant); assert(val_constant->is_constant);
uint64_t val = val_constant->constant_value; uint64_t val = val_constant->constant_value;
restoreArgs();
// TODO if var is a constant, we will end up emitting something like // TODO if var is a constant, we will end up emitting something like
// mov $0x123, %rax // mov $0x123, %rax
// cmp $0x10(%rax), %rdi // cmp $0x10(%rax), %rdi
...@@ -365,6 +376,7 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_cons ...@@ -365,6 +376,7 @@ void Rewriter::_addAttrGuard(RewriterVar* var, int offset, RewriterVar* val_cons
assembler->cmp(assembler::Indirect(var_reg, offset), assembler::Immediate(val)); assembler->cmp(assembler::Indirect(var_reg, offset), assembler::Immediate(val));
} }
restoreArgs(); // can only do movs, doesn't affect flags, so it's safe
assertArgsInPlace(); assertArgsInPlace();
if (negate) if (negate)
assembler->je(assembler::JumpDestination::fromStart(rewrite->getSlotSize())); assembler->je(assembler::JumpDestination::fromStart(rewrite->getSlotSize()));
...@@ -593,6 +605,8 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_ ...@@ -593,6 +605,8 @@ assembler::Register RewriterVar::getInReg(Location dest, bool allow_constant_in_
assembler::Register dest_reg = dest.asRegister(); assembler::Register dest_reg = dest.asRegister();
assert(dest_reg != reg); // should have been caught by the previous case assert(dest_reg != reg); // should have been caught by the previous case
rewriter->allocReg(dest);
rewriter->assembler->mov(reg, dest_reg); rewriter->assembler->mov(reg, dest_reg);
rewriter->addLocationToVar(this, dest_reg); rewriter->addLocationToVar(this, dest_reg);
return dest_reg; return dest_reg;
...@@ -1057,6 +1071,13 @@ void Rewriter::commit() { ...@@ -1057,6 +1071,13 @@ void Rewriter::commit() {
for (int i = 0; i < live_outs.size(); i++) { for (int i = 0; i < live_outs.size(); i++) {
live_outs[i]->uses.push_back(actions.size()); live_outs[i]->uses.push_back(actions.size());
} }
for (RewriterVar* var : vars) {
// Add a use for every constant. This helps make constants available for the lea stuff
// But since "spilling" a constant has no cost, it shouldn't add register pressure.
if (var->is_constant) {
var->uses.push_back(actions.size());
}
}
assertConsistent(); assertConsistent();
...@@ -1134,12 +1155,18 @@ void Rewriter::commit() { ...@@ -1134,12 +1155,18 @@ void Rewriter::commit() {
num_as_live_out++; num_as_live_out++;
} }
} }
assert(var->next_use + num_as_live_out == var->uses.size()); assert(var->next_use + num_as_live_out + (var->is_constant ? 1 : 0) == var->uses.size());
} }
#endif #endif
assert(live_out_regs.size() == live_outs.size()); assert(live_out_regs.size() == live_outs.size());
for (RewriterVar* var : vars) {
if (var->is_constant) {
var->bumpUse();
}
}
// Live-outs placement: sometimes a live out can be placed into the location of a different live-out, // Live-outs placement: sometimes a live out can be placed into the location of a different live-out,
// so we need to reshuffle and solve those conflicts. // so we need to reshuffle and solve those conflicts.
// For now, just use a simple approach, and iteratively try to move variables into place, and skip // For now, just use a simple approach, and iteratively try to move variables into place, and skip
...@@ -1956,9 +1983,9 @@ void setSlowpathFunc(uint8_t* pp_addr, void* func) { ...@@ -1956,9 +1983,9 @@ void setSlowpathFunc(uint8_t* pp_addr, void* func) {
*(void**)&pp_addr[2] = func; *(void**)&pp_addr[2] = func;
} }
std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr, PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size, int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped) { const std::unordered_set<int>& live_outs, SpillMap& remapped) {
assert(start_addr < end_addr); assert(start_addr < end_addr);
int est_slowpath_size = INITIAL_CALL_SIZE; int est_slowpath_size = INITIAL_CALL_SIZE;
...@@ -1966,14 +1993,18 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t ...@@ -1966,14 +1993,18 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
std::vector<assembler::GenericRegister> regs_to_spill; std::vector<assembler::GenericRegister> regs_to_spill;
std::vector<assembler::Register> regs_to_reload; std::vector<assembler::Register> regs_to_reload;
std::unordered_set<int> live_outs_for_slot;
for (int dwarf_regnum : live_outs) { for (int dwarf_regnum : live_outs) {
assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum); assembler::GenericRegister ru = assembler::GenericRegister::fromDwarf(dwarf_regnum);
assert(!(ru.type == assembler::GenericRegister::GP && ru.gp == assembler::R11) && "We assume R11 is free!"); assert(!(ru.type == assembler::GenericRegister::GP && ru.gp == assembler::R11) && "We assume R11 is free!");
if (ru.type == assembler::GenericRegister::GP) { if (ru.type == assembler::GenericRegister::GP) {
if (ru.gp == assembler::RSP || ru.gp.isCalleeSave()) if (ru.gp == assembler::RSP || ru.gp.isCalleeSave()) {
live_outs_for_slot.insert(dwarf_regnum);
continue; continue;
}
} }
// Location(ru).dump(); // Location(ru).dump();
...@@ -1983,9 +2014,12 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t ...@@ -1983,9 +2014,12 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
regs_to_reload.push_back(ru.gp); regs_to_reload.push_back(ru.gp);
est_slowpath_size += 7; // 7 bytes for a single mov est_slowpath_size += 7; // 7 bytes for a single mov
continue; continue;
} }
live_outs_for_slot.insert(dwarf_regnum);
regs_to_spill.push_back(ru); regs_to_spill.push_back(ru);
if (ru.type == assembler::GenericRegister::GP) if (ru.type == assembler::GenericRegister::GP)
...@@ -2014,9 +2048,22 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t ...@@ -2014,9 +2048,22 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
// if (regs_to_spill.size()) // if (regs_to_spill.size())
// assem.trap(); // assem.trap();
assem.emitBatchPush(scratch_offset, scratch_size, regs_to_spill); assem.emitBatchPush(scratch_offset, scratch_size, regs_to_spill);
uint8_t* rtn = assem.emitCall(slowpath_func, assembler::R11); uint8_t* slowpath_rtn_addr = assem.emitCall(slowpath_func, assembler::R11);
assem.emitBatchPop(scratch_offset, scratch_size, regs_to_spill); assem.emitBatchPop(scratch_offset, scratch_size, regs_to_spill);
// The place we should continue if we took a fast path.
// If we have to reload things, make sure to set it to the beginning
// of the reloading section.
// If there's nothing to reload, as a small optimization, set it to the end of
// the patchpoint, past any nops.
// (Actually I think the calculations of the size above were exact so there should
// always be 0 nops, but this optimization shouldn't hurt.)
uint8_t* continue_addr;
if (regs_to_reload.empty())
continue_addr = end_addr;
else
continue_addr = assem.curInstPointer();
for (assembler::Register r : regs_to_reload) { for (assembler::Register r : regs_to_reload) {
StackMap::Record::Location& l = remapped[r]; StackMap::Record::Location& l = remapped[r];
assert(l.type == StackMap::Record::Location::LocationType::Indirect); assert(l.type == StackMap::Record::Location::LocationType::Indirect);
...@@ -2028,6 +2075,7 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t ...@@ -2028,6 +2075,7 @@ std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t
assem.fillWithNops(); assem.fillWithNops();
assert(!assem.hasFailed()); assert(!assem.hasFailed());
return std::make_pair(slowpath_start, rtn); return PatchpointInitializationInfo(slowpath_start, slowpath_rtn_addr, continue_addr,
std::move(live_outs_for_slot));
} }
} }
...@@ -539,10 +539,23 @@ typedef std::map<assembler::GenericRegister, StackMap::Record::Location, GRCompa ...@@ -539,10 +539,23 @@ typedef std::map<assembler::GenericRegister, StackMap::Record::Location, GRCompa
bool spillFrameArgumentIfNecessary(StackMap::Record::Location& l, uint8_t*& inst_addr, uint8_t* inst_end, bool spillFrameArgumentIfNecessary(StackMap::Record::Location& l, uint8_t*& inst_addr, uint8_t* inst_end,
int& scratch_offset, int& scratch_size, SpillMap& remapped); int& scratch_offset, int& scratch_size, SpillMap& remapped);
// returns (start_of_slowpath, return_addr_of_slowpath_call) struct PatchpointInitializationInfo {
std::pair<uint8_t*, uint8_t*> initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr, uint8_t* slowpath_start;
int scratch_offset, int scratch_size, uint8_t* slowpath_rtn_addr;
const std::unordered_set<int>& live_outs, SpillMap& remapped); uint8_t* continue_addr;
std::unordered_set<int> live_outs;
PatchpointInitializationInfo(uint8_t* slowpath_start, uint8_t* slowpath_rtn_addr, uint8_t* continue_addr,
std::unordered_set<int>&& live_outs)
: slowpath_start(slowpath_start),
slowpath_rtn_addr(slowpath_rtn_addr),
continue_addr(continue_addr),
live_outs(std::move(live_outs)) {}
};
PatchpointInitializationInfo initializePatchpoint3(void* slowpath_func, uint8_t* start_addr, uint8_t* end_addr,
int scratch_offset, int scratch_size,
const std::unordered_set<int>& live_outs, SpillMap& remapped);
template <> inline RewriterVar* RewriterVar::getAttrCast<bool, bool>(int offset, Location loc) { template <> inline RewriterVar* RewriterVar::getAttrCast<bool, bool>(int offset, Location loc) {
return getAttr(offset, loc, assembler::MovType::ZBL); return getAttr(offset, loc, assembler::MovType::ZBL);
......
...@@ -235,13 +235,10 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) { ...@@ -235,13 +235,10 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
} }
auto initialization_info = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset,
scratch_size, live_outs, frame_remapped);
auto _p = initializePatchpoint3(slowpath_func, start_addr, end_addr, scratch_rbp_offset, scratch_size, ASSERT(initialization_info.slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
live_outs, frame_remapped);
uint8_t* slowpath_start = _p.first;
uint8_t* slowpath_rtn_addr = _p.second;
ASSERT(slowpath_start - start_addr >= ic->num_slots * ic->slot_size,
"Used more slowpath space than expected; change ICSetupInfo::totalSize()?"); "Used more slowpath space than expected; change ICSetupInfo::totalSize()?");
assert(pp->numICStackmapArgs() == 0); // don't do anything with these for now assert(pp->numICStackmapArgs() == 0); // don't do anything with these for now
...@@ -259,9 +256,10 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) { ...@@ -259,9 +256,10 @@ void processStackmap(CompiledFunction* cf, StackMap* stackmap) {
// (rbp - rsp) == (stack_size - 8) -- the "-8" is from the value of rbp being pushed onto the stack // (rbp - rsp) == (stack_size - 8) -- the "-8" is from the value of rbp being pushed onto the stack
int scratch_rsp_offset = scratch_rbp_offset + (stack_size - 8); int scratch_rsp_offset = scratch_rbp_offset + (stack_size - 8);
std::unique_ptr<ICInfo> icinfo std::unique_ptr<ICInfo> icinfo = registerCompiledPatchpoint(
= registerCompiledPatchpoint(start_addr, slowpath_start, end_addr, slowpath_rtn_addr, ic, start_addr, initialization_info.slowpath_start, initialization_info.continue_addr,
StackInfo(scratch_size, scratch_rsp_offset), std::move(live_outs)); initialization_info.slowpath_rtn_addr, ic, StackInfo(scratch_size, scratch_rsp_offset),
std::move(initialization_info.live_outs));
assert(cf); assert(cf);
// TODO: unsafe. hard to use a unique_ptr here though. // TODO: unsafe. hard to use a unique_ptr here though.
......
...@@ -273,12 +273,13 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R ...@@ -273,12 +273,13 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
SpillMap _spill_map; SpillMap _spill_map;
std::pair<uint8_t*, uint8_t*> p PatchpointInitializationInfo initialization_info
= initializePatchpoint3(func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */, = initializePatchpoint3(func_addr, pp_start, pp_end, 0 /* scratch_offset */, 0 /* scratch_size */,
std::unordered_set<int>(), _spill_map); std::unordered_set<int>(), _spill_map);
assert(_spill_map.size() == 0); assert(_spill_map.size() == 0);
assert(p.first == pp_start + patchable_size); assert(initialization_info.slowpath_start == pp_start + patchable_size);
assert(p.second == pp_end); assert(initialization_info.slowpath_rtn_addr == pp_end);
assert(initialization_info.continue_addr == pp_end);
StackInfo stack_info(SCRATCH_BYTES, 0); StackInfo stack_info(SCRATCH_BYTES, 0);
icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(), icinfo = registerCompiledPatchpoint(pp_start, pp_start + patchable_size, pp_end, pp_end, setup_info.get(),
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment