Commit 758a9b32 authored by Marius Wachtler's avatar Marius Wachtler

Merge pull request #970 from undingen/cache_optimized

object cache: hash IR before running any opt passes
parents accd1839 1e62e500
...@@ -31,6 +31,8 @@ class TargetMachine; ...@@ -31,6 +31,8 @@ class TargetMachine;
namespace pyston { namespace pyston {
class PystonObjectCache;
class FunctionAddressRegistry { class FunctionAddressRegistry {
private: private:
struct FuncInfo { struct FuncInfo {
...@@ -62,6 +64,7 @@ struct GlobalState { ...@@ -62,6 +64,7 @@ struct GlobalState {
CompiledFunction* cur_cf; CompiledFunction* cur_cf;
llvm::TargetMachine* tm; llvm::TargetMachine* tm;
llvm::ExecutionEngine* engine; llvm::ExecutionEngine* engine;
PystonObjectCache* object_cache;
std::vector<llvm::JITEventListener*> jit_listeners; std::vector<llvm::JITEventListener*> jit_listeners;
......
...@@ -25,7 +25,6 @@ ...@@ -25,7 +25,6 @@
#include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITEventListener.h"
#include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/MCJIT.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/Support/CommandLine.h" #include "llvm/Support/CommandLine.h"
...@@ -39,6 +38,7 @@ ...@@ -39,6 +38,7 @@
#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Cloning.h"
#include "codegen/codegen.h" #include "codegen/codegen.h"
#include "codegen/irgen.h"
#include "codegen/memmgr.h" #include "codegen/memmgr.h"
#include "codegen/profiling/profiling.h" #include "codegen/profiling/profiling.h"
#include "codegen/stackmaps.h" #include "codegen/stackmaps.h"
...@@ -202,16 +202,15 @@ public: ...@@ -202,16 +202,15 @@ public:
} }
}; };
class PystonObjectCache : public llvm::ObjectCache {
private: // Stream which calculates the SHA256 hash of the data writen to.
// Stream which calculates the SHA256 hash of the data writen to. class HashOStream : public llvm::raw_ostream {
class HashOStream : public llvm::raw_ostream {
EVP_MD_CTX* md_ctx; EVP_MD_CTX* md_ctx;
void write_impl(const char* ptr, size_t size) override { EVP_DigestUpdate(md_ctx, ptr, size); } void write_impl(const char* ptr, size_t size) override { EVP_DigestUpdate(md_ctx, ptr, size); }
uint64_t current_pos() const override { return 0; } uint64_t current_pos() const override { return 0; }
public: public:
HashOStream() { HashOStream() {
md_ctx = EVP_MD_CTX_create(); md_ctx = EVP_MD_CTX_create();
RELEASE_ASSERT(md_ctx, ""); RELEASE_ASSERT(md_ctx, "");
...@@ -234,29 +233,23 @@ private: ...@@ -234,29 +233,23 @@ private:
stream.write_hex(md_value[i]); stream.write_hex(md_value[i]);
return stream.str(); return stream.str();
} }
}; };
llvm::SmallString<128> cache_dir;
std::string module_identifier;
std::string hash_before_codegen;
public: PystonObjectCache::PystonObjectCache() {
PystonObjectCache() {
llvm::sys::path::home_directory(cache_dir); llvm::sys::path::home_directory(cache_dir);
llvm::sys::path::append(cache_dir, ".cache"); llvm::sys::path::append(cache_dir, ".cache");
llvm::sys::path::append(cache_dir, "pyston"); llvm::sys::path::append(cache_dir, "pyston");
llvm::sys::path::append(cache_dir, "object_cache"); llvm::sys::path::append(cache_dir, "object_cache");
cleanupCacheDirectory(); cleanupCacheDirectory();
} }
#if LLVMREV < 216002 #if LLVMREV < 216002
virtual void notifyObjectCompiled(const llvm::Module* M, const llvm::MemoryBuffer* Obj) void PystonObjectCache::notifyObjectCompiled(const llvm::Module* M, const llvm::MemoryBuffer* Obj)
#else #else
virtual void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj) void PystonObjectCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
#endif #endif
{ {
RELEASE_ASSERT(module_identifier == M->getModuleIdentifier(), ""); RELEASE_ASSERT(module_identifier == M->getModuleIdentifier(), "");
RELEASE_ASSERT(!hash_before_codegen.empty(), ""); RELEASE_ASSERT(!hash_before_codegen.empty(), "");
...@@ -266,27 +259,22 @@ public: ...@@ -266,27 +259,22 @@ public:
return; return;
CompressedFile::writeFile(cache_file, Obj.getBuffer()); CompressedFile::writeFile(cache_file, Obj.getBuffer());
} }
#if LLVMREV < 215566 #if LLVMREV < 215566
virtual llvm::MemoryBuffer* getObject(const llvm::Module* M) llvm::MemoryBuffer* PystonObjectCache::getObject(const llvm::Module* M)
#else #else
virtual std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M) std::unique_ptr<llvm::MemoryBuffer> PystonObjectCache::getObject(const llvm::Module* M)
#endif #endif
{ {
static StatCounter jit_objectcache_hits("num_jit_objectcache_hits"); static StatCounter jit_objectcache_hits("num_jit_objectcache_hits");
static StatCounter jit_objectcache_misses("num_jit_objectcache_misses"); static StatCounter jit_objectcache_misses("num_jit_objectcache_misses");
module_identifier = M->getModuleIdentifier(); module_identifier = M->getModuleIdentifier();
// Generate a hash for the module RELEASE_ASSERT(!hash_before_codegen.empty(), "hash should have already got calculated");
HashOStream hash_stream;
llvm::WriteBitcodeToFile(M, hash_stream);
hash_before_codegen = hash_stream.getHash();
llvm::SmallString<128> cache_file = cache_dir; if (!haveCacheFileForHash()) {
llvm::sys::path::append(cache_file, hash_before_codegen);
if (!llvm::sys::fs::exists(cache_file.str())) {
#if 0 #if 0
// This code helps with identifying why we got a cache miss for a file. // This code helps with identifying why we got a cache miss for a file.
// - clear the cache directory // - clear the cache directory
...@@ -313,6 +301,8 @@ public: ...@@ -313,6 +301,8 @@ public:
return NULL; return NULL;
} }
llvm::SmallString<128> cache_file = cache_dir;
llvm::sys::path::append(cache_file, hash_before_codegen);
std::unique_ptr<llvm::MemoryBuffer> mem_buff = CompressedFile::getFile(cache_file); std::unique_ptr<llvm::MemoryBuffer> mem_buff = CompressedFile::getFile(cache_file);
if (!mem_buff) { if (!mem_buff) {
jit_objectcache_misses.log(); jit_objectcache_misses.log();
...@@ -321,9 +311,9 @@ public: ...@@ -321,9 +311,9 @@ public:
jit_objectcache_hits.log(); jit_objectcache_hits.log();
return mem_buff; return mem_buff;
} }
void cleanupCacheDirectory() { void PystonObjectCache::cleanupCacheDirectory() {
// Find all files inside the cache directory, if the number of files is larger than // Find all files inside the cache directory, if the number of files is larger than
// MAX_OBJECT_CACHE_ENTRIES, // MAX_OBJECT_CACHE_ENTRIES,
// sort them by last modification time and remove the oldest excessive ones. // sort them by last modification time and remove the oldest excessive ones.
...@@ -347,8 +337,21 @@ public: ...@@ -347,8 +337,21 @@ public:
for (int i = 0; i < num_expired; ++i) for (int i = 0; i < num_expired; ++i)
llvm::sys::fs::remove(cache_files[i].first); llvm::sys::fs::remove(cache_files[i].first);
} }
};
void PystonObjectCache::calculateModuleHash(const llvm::Module* M, EffortLevel effort) {
HashOStream hash_stream;
llvm::WriteBitcodeToFile(M, hash_stream);
hash_stream << (int)effort;
hash_before_codegen = hash_stream.getHash();
}
bool PystonObjectCache::haveCacheFileForHash() {
llvm::SmallString<128> cache_file = cache_dir;
llvm::sys::path::append(cache_file, hash_before_codegen);
return llvm::sys::fs::exists(cache_file.str());
}
static void handle_sigusr1(int signum) { static void handle_sigusr1(int signum) {
assert(signum == SIGUSR1); assert(signum == SIGUSR1);
...@@ -425,8 +428,10 @@ void initCodegen() { ...@@ -425,8 +428,10 @@ void initCodegen() {
g.engine = eb.create(g.tm); g.engine = eb.create(g.tm);
assert(g.engine && "engine creation failed?"); assert(g.engine && "engine creation failed?");
if (ENABLE_JIT_OBJECT_CACHE) if (ENABLE_JIT_OBJECT_CACHE) {
g.engine->setObjectCache(new PystonObjectCache()); g.object_cache = new PystonObjectCache;
g.engine->setObjectCache(g.object_cache);
}
g.i1 = llvm::Type::getInt1Ty(g.context); g.i1 = llvm::Type::getInt1Ty(g.context);
g.i8 = llvm::Type::getInt8Ty(g.context); g.i8 = llvm::Type::getInt8Ty(g.context);
......
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <stdint.h> #include <stdint.h>
#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Passes.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/IR/DIBuilder.h" #include "llvm/IR/DIBuilder.h"
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
...@@ -1120,8 +1121,18 @@ CompiledFunction* doCompile(CLFunction* clfunc, SourceInfo* source, ParamNames* ...@@ -1120,8 +1121,18 @@ CompiledFunction* doCompile(CLFunction* clfunc, SourceInfo* source, ParamNames*
static StatCounter us_irgen("us_compiling_irgen"); static StatCounter us_irgen("us_compiling_irgen");
us_irgen.log(irgen_us); us_irgen.log(irgen_us);
// Calculate the module hash before doing any optimizations.
// This has the advantage that we can skip running the opt passes when we have cached object file
// but the disadvantage that optimizations are not allowed to add new symbolic constants...
if (ENABLE_JIT_OBJECT_CACHE) {
g.object_cache->calculateModuleHash(g.cur_module, effort);
if (ENABLE_LLVMOPTS && !g.object_cache->haveCacheFileForHash())
optimizeIR(f, effort);
} else {
if (ENABLE_LLVMOPTS) if (ENABLE_LLVMOPTS)
optimizeIR(f, effort); optimizeIR(f, effort);
}
g.cur_module = NULL; g.cur_module = NULL;
......
...@@ -15,6 +15,8 @@ ...@@ -15,6 +15,8 @@
#ifndef PYSTON_CODEGEN_IRGEN_H #ifndef PYSTON_CODEGEN_IRGEN_H
#define PYSTON_CODEGEN_IRGEN_H #define PYSTON_CODEGEN_IRGEN_H
#include "llvm/ADT/SmallString.h"
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/IR/CallSite.h" #include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h" #include "llvm/IR/Function.h"
#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Intrinsics.h"
...@@ -150,6 +152,35 @@ public: ...@@ -150,6 +152,35 @@ public:
ExceptionStyle preferredExceptionStyle() const { return unw_info.preferredExceptionStyle(); } ExceptionStyle preferredExceptionStyle() const { return unw_info.preferredExceptionStyle(); }
}; };
class PystonObjectCache : public llvm::ObjectCache {
private:
llvm::SmallString<128> cache_dir;
std::string module_identifier;
std::string hash_before_codegen;
public:
PystonObjectCache();
#if LLVMREV < 216002
virtual void notifyObjectCompiled(const llvm::Module* M, const llvm::MemoryBuffer* Obj);
#else
virtual void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj);
#endif
#if LLVMREV < 215566
virtual llvm::MemoryBuffer* getObject(const llvm::Module* M);
#else
virtual std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M);
#endif
void cleanupCacheDirectory();
void calculateModuleHash(const llvm::Module* M, EffortLevel effort);
bool haveCacheFileForHash();
};
} }
#endif #endif
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment