Commit c2f6b997 authored by Marius Wachtler's avatar Marius Wachtler

EH frame: use a single allocation for the EH frame + generated code

This removes the 2GB process limitation I ran into.
(With the EH frame format we are using the code offset is specified as a 32bit signed offset)
I checked if aligning the code speeds it up but I did not notice any perf change.
parent 189c7ac1
......@@ -53,17 +53,20 @@ const unsigned char eh_info[]
static_assert(JitCodeBlock::num_stack_args == 2, "have to update EH table!");
static_assert(JitCodeBlock::scratch_size == 256, "have to update EH table!");
constexpr int code_size = JitCodeBlock::memory_size - sizeof(eh_info);
JitCodeBlock::JitCodeBlock(llvm::StringRef name)
: code(new uint8_t[code_size]),
eh_frame(new uint8_t[sizeof(eh_info)]),
: memory(new uint8_t[memory_size]),
entry_offset(0),
a(code.get(), code_size),
a(memory.get() + sizeof(eh_info), code_size),
is_currently_writing(false),
asm_failed(false) {
static StatCounter num_jit_code_blocks("num_baselinejit_code_blocks");
num_jit_code_blocks.log();
static StatCounter num_jit_total_bytes("num_baselinejit_total_bytes");
num_jit_total_bytes.log(code_size);
num_jit_total_bytes.log(memory_size);
uint8_t* code = a.curInstPointer();
// emit prolog
a.push(assembler::R14);
......@@ -78,20 +81,20 @@ JitCodeBlock::JitCodeBlock(llvm::StringRef name)
// generate the eh frame...
const int size = sizeof(eh_info);
void* eh_frame_addr = eh_frame.get();
void* eh_frame_addr = memory.get();
memcpy(eh_frame_addr, eh_info, size);
int32_t* offset_ptr = (int32_t*)((uint8_t*)eh_frame_addr + 0x20);
int32_t* size_ptr = (int32_t*)((uint8_t*)eh_frame_addr + 0x24);
int64_t offset = (int8_t*)code.get() - (int8_t*)offset_ptr;
RELEASE_ASSERT(offset >= INT_MIN && offset <= INT_MAX, "");
int64_t offset = (int8_t*)code - (int8_t*)offset_ptr;
assert(offset >= INT_MIN && offset <= INT_MAX);
*offset_ptr = offset;
*size_ptr = code_size;
registerDynamicEhFrame((uint64_t)code.get(), code_size, (uint64_t)eh_frame_addr, size - 4);
registerDynamicEhFrame((uint64_t)code, code_size, (uint64_t)eh_frame_addr, size - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
g.func_addr_registry.registerFunction(("bjit_" + name).str(), code.get(), code_size, NULL);
g.func_addr_registry.registerFunction(("bjit_" + name).str(), code, code_size, NULL);
}
std::unique_ptr<JitFragmentWriter> JitCodeBlock::newFragment(CFGBlock* block, int patch_jump_offset) {
......@@ -566,7 +569,7 @@ int JitFragmentWriter::finishCompilation() {
int bytes_written = assembler->bytesWritten();
// don't retry JITing very large blocks
const auto large_block_threshold = JitCodeBlock::code_size - 4096;
const auto large_block_threshold = code_size - 4096;
if (bytes_written > large_block_threshold) {
static StatCounter num_jit_large_blocks("num_baselinejit_skipped_large_blocks");
num_jit_large_blocks.log();
......
......@@ -132,7 +132,7 @@ class JitFragmentWriter;
class JitCodeBlock {
public:
static constexpr int scratch_size = 256;
static constexpr int code_size = 32768;
static constexpr int memory_size = 32768; // must fit the EH frame + generated code
static constexpr int num_stack_args = 2;
// scratch size + space for passing additional args on the stack without having to adjust the SP when calling
......@@ -140,8 +140,8 @@ public:
static constexpr int sp_adjustment = scratch_size + num_stack_args * 8 + 8 /* = alignment */;
private:
std::unique_ptr<uint8_t[]> code;
std::unique_ptr<uint8_t[]> eh_frame;
// the memory block contains the EH frame directly followed by the generated machine code.
std::unique_ptr<uint8_t[]> memory;
int entry_offset;
assembler::Assembler a;
bool is_currently_writing;
......
......@@ -154,7 +154,12 @@ static const char _eh_frame_template_fp[] =
static constexpr int _eh_frame_template_ofp_size = sizeof(_eh_frame_template_ofp) - 1;
static constexpr int _eh_frame_template_fp_size = sizeof(_eh_frame_template_fp) - 1;
#define EH_FRAME_SIZE (sizeof(_eh_frame_template) - 1) // omit string-terminating null byte
#if RUNTIMEICS_OMIT_FRAME_PTR
#define EH_FRAME_SIZE _eh_frame_template_ofp_size
#else
#define EH_FRAME_SIZE _eh_frame_template_fp_size;
#endif
static_assert(sizeof("") == 1, "strings are null-terminated");
......@@ -175,35 +180,6 @@ static void writeTrivialEhFrame(void* eh_frame_addr, void* func_addr, uint64_t f
*size_ptr = func_size;
}
void EHFrameManager::writeAndRegister(void* func_addr, uint64_t func_size) {
assert(eh_frame_addr == NULL);
const int size = omit_frame_pointer ? _eh_frame_template_ofp_size : _eh_frame_template_fp_size;
#ifdef NVALGRIND
eh_frame_addr = malloc(size);
#else
eh_frame_addr = mmap(NULL, (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
RELEASE_ASSERT(eh_frame_addr != MAP_FAILED, "");
#endif
writeTrivialEhFrame(eh_frame_addr, func_addr, func_size, omit_frame_pointer);
// (EH_FRAME_SIZE - 4) to omit the 4-byte null terminator, otherwise we trip an assert in parseEhFrame.
// TODO: can we omit the terminator in general?
registerDynamicEhFrame((uint64_t)func_addr, func_size, (uint64_t)eh_frame_addr, size - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
}
EHFrameManager::~EHFrameManager() {
if (eh_frame_addr) {
const int size = omit_frame_pointer ? _eh_frame_template_ofp_size : _eh_frame_template_fp_size;
deregisterEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, size);
#ifdef NVALGRIND
free(eh_frame_addr);
#else
munmap(eh_frame_addr, (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1));
#endif
}
}
#if RUNTIMEICS_OMIT_FRAME_PTR
// If you change this, you *must* update the value in _eh_frame_template
// (set the -9'th byte to this value plus 8)
......@@ -212,7 +188,7 @@ EHFrameManager::~EHFrameManager() {
#define SCRATCH_BYTES 0x30
#endif
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(RUNTIMEICS_OMIT_FRAME_PTR) {
RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) {
static StatCounter sc("runtime_ics_num");
sc.log();
......@@ -254,15 +230,20 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
int patchable_size = num_slots * slot_size;
int total_code_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE;
#ifdef NVALGRIND
int total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE;
int total_size = total_code_size + EH_FRAME_SIZE;
addr = malloc(total_size);
#else
total_size = PROLOGUE_SIZE + patchable_size + CALL_SIZE + EPILOGUE_SIZE;
total_size = total_code_size + EH_FRAME_SIZE;
addr = mmap(NULL, (total_size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1), PROT_READ | PROT_WRITE | PROT_EXEC,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
RELEASE_ASSERT(addr != MAP_FAILED, "");
#endif
// the memory block contains the EH frame directly followed by the generated machine code.
void* eh_frame_addr = addr;
addr = (char*)addr + EH_FRAME_SIZE;
// printf("Allocated runtime IC at %p\n", addr);
......@@ -308,9 +289,11 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
assert(!epilogue_assem.hasFailed());
assert(epilogue_assem.isExactlyFull());
// TODO: ideally would be more intelligent about allocation strategies.
// The code sections should be together and the eh sections together
eh_frame.writeAndRegister(addr, total_size);
writeTrivialEhFrame(eh_frame_addr, addr, total_code_size, RUNTIMEICS_OMIT_FRAME_PTR);
// (EH_FRAME_SIZE - 4) to omit the 4-byte null terminator, otherwise we trip an assert in parseEhFrame.
// TODO: can we omit the terminator in general?
registerDynamicEhFrame((uint64_t)addr, total_code_size, (uint64_t)eh_frame_addr, EH_FRAME_SIZE - 4);
registerEHFrames((uint8_t*)eh_frame_addr, (uint64_t)eh_frame_addr, EH_FRAME_SIZE);
} else {
addr = func_addr;
}
......@@ -319,10 +302,12 @@ RuntimeIC::RuntimeIC(void* func_addr, int num_slots, int slot_size) : eh_frame(R
RuntimeIC::~RuntimeIC() {
if (ENABLE_RUNTIME_ICS) {
deregisterCompiledPatchpoint(icinfo.get());
uint8_t* eh_frame_addr = (uint8_t*)addr - EH_FRAME_SIZE;
deregisterEHFrames(eh_frame_addr, (uint64_t)eh_frame_addr, EH_FRAME_SIZE);
#ifdef NVALGRIND
free(addr);
free(eh_frame_addr);
#else
munmap(addr, total_size);
munmap(eh_frame_addr, total_size);
#endif
} else {
}
......
......@@ -22,24 +22,12 @@ namespace pyston {
class ICInfo;
class EHFrameManager {
private:
void* eh_frame_addr;
bool omit_frame_pointer;
public:
EHFrameManager(bool omit_frame_pointer) : eh_frame_addr(NULL), omit_frame_pointer(omit_frame_pointer) {}
~EHFrameManager();
void writeAndRegister(void* func_addr, uint64_t func_size);
};
class RuntimeIC {
private:
void* addr;
void* addr; // points to function start not the start of the allocated memory block.
#ifndef NVALGRIND
size_t total_size;
#endif
EHFrameManager eh_frame;
std::unique_ptr<ICInfo> icinfo;
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment