Merge pull request #970 from undingen/cache_optimized

object cache: hash IR before running any opt passes

Merge pull request #970 from undingen/cache_optimized
object cache: hash IR before running any opt passes
758a9b32 · Marius Wachtler · accd1839 · 1e62e500 · 758a9b32 · 758a9b32
Commit 758a9b32 authored Oct 16, 2015 by Marius Wachtler
Showing with 156 additions and 106 deletions

src/codegen/codegen.h src/codegen/codegen.h +3 -0

src/codegen/entry.cpp src/codegen/entry.cpp +109 -104

src/codegen/irgen.cpp src/codegen/irgen.cpp +13 -2

src/codegen/irgen.h src/codegen/irgen.h +31 -0

No files found.
--- a/src/codegen/codegen.h
+++ b/src/codegen/codegen.h
@@ -31,6 +31,8 @@ class TargetMachine;

 namespace pyston {

+class PystonObjectCache;
+
 class FunctionAddressRegistry {
 private:
    struct FuncInfo {
@@ -62,6 +64,7 @@ struct GlobalState {
    CompiledFunction* cur_cf;
    llvm::TargetMachine* tm;
    llvm::ExecutionEngine* engine;
+    PystonObjectCache* object_cache;

    std::vector<llvm::JITEventListener*> jit_listeners;


--- a/src/codegen/entry.cpp
+++ b/src/codegen/entry.cpp
@@ -25,7 +25,6 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/MCJIT.h"
-#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
@@ -39,6 +38,7 @@
 #include "llvm/Transforms/Utils/Cloning.h"

 #include "codegen/codegen.h"
+#include "codegen/irgen.h"
 #include "codegen/memmgr.h"
 #include "codegen/profiling/profiling.h"
 #include "codegen/stackmaps.h"
@@ -202,16 +202,15 @@ public:
    }
 };

-class PystonObjectCache : public llvm::ObjectCache {
-private:
-    // Stream which calculates the SHA256 hash of the data writen to.
-    class HashOStream : public llvm::raw_ostream {
+
+// Stream which calculates the SHA256 hash of the data writen to.
+class HashOStream : public llvm::raw_ostream {
    EVP_MD_CTX* md_ctx;

    void write_impl(const char* ptr, size_t size) override { EVP_DigestUpdate(md_ctx, ptr, size); }
    uint64_t current_pos() const override { return 0; }

-    public:
+public:
    HashOStream() {
        md_ctx = EVP_MD_CTX_create();
        RELEASE_ASSERT(md_ctx, "");
@@ -234,29 +233,23 @@ private:
            stream.write_hex(md_value[i]);
        return stream.str();
    }
-    };
-
-    llvm::SmallString<128> cache_dir;
-    std::string module_identifier;
-    std::string hash_before_codegen;
+};

-public:
-    PystonObjectCache() {
+PystonObjectCache::PystonObjectCache() {
    llvm::sys::path::home_directory(cache_dir);
    llvm::sys::path::append(cache_dir, ".cache");
    llvm::sys::path::append(cache_dir, "pyston");
    llvm::sys::path::append(cache_dir, "object_cache");

    cleanupCacheDirectory();
-    }
-
+}

 #if LLVMREV < 216002
-    virtual void notifyObjectCompiled(const llvm::Module* M, const llvm::MemoryBuffer* Obj)
+void PystonObjectCache::notifyObjectCompiled(const llvm::Module* M, const llvm::MemoryBuffer* Obj)
 #else
-    virtual void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
+void PystonObjectCache::notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj)
 #endif
-    {
+{
    RELEASE_ASSERT(module_identifier == M->getModuleIdentifier(), "");
    RELEASE_ASSERT(!hash_before_codegen.empty(), "");

@@ -266,27 +259,22 @@ public:
        return;

    CompressedFile::writeFile(cache_file, Obj.getBuffer());
-    }
+}

 #if LLVMREV < 215566
-    virtual llvm::MemoryBuffer* getObject(const llvm::Module* M)
+llvm::MemoryBuffer* PystonObjectCache::getObject(const llvm::Module* M)
 #else
-    virtual std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M)
+std::unique_ptr<llvm::MemoryBuffer> PystonObjectCache::getObject(const llvm::Module* M)
 #endif
-    {
+{
    static StatCounter jit_objectcache_hits("num_jit_objectcache_hits");
    static StatCounter jit_objectcache_misses("num_jit_objectcache_misses");

    module_identifier = M->getModuleIdentifier();

-        // Generate a hash for the module
-        HashOStream hash_stream;
-        llvm::WriteBitcodeToFile(M, hash_stream);
-        hash_before_codegen = hash_stream.getHash();
+    RELEASE_ASSERT(!hash_before_codegen.empty(), "hash should have already got calculated");

-        llvm::SmallString<128> cache_file = cache_dir;
-        llvm::sys::path::append(cache_file, hash_before_codegen);
-        if (!llvm::sys::fs::exists(cache_file.str())) {
+    if (!haveCacheFileForHash()) {
 #if 0
            // This code helps with identifying why we got a cache miss for a file.
            // - clear the cache directory
@@ -313,6 +301,8 @@ public:
        return NULL;
    }

+    llvm::SmallString<128> cache_file = cache_dir;
+    llvm::sys::path::append(cache_file, hash_before_codegen);
    std::unique_ptr<llvm::MemoryBuffer> mem_buff = CompressedFile::getFile(cache_file);
    if (!mem_buff) {
        jit_objectcache_misses.log();
@@ -321,9 +311,9 @@ public:

    jit_objectcache_hits.log();
    return mem_buff;
-    }
+}

-    void cleanupCacheDirectory() {
+void PystonObjectCache::cleanupCacheDirectory() {
    // Find all files inside the cache directory, if the number of files is larger than
    // MAX_OBJECT_CACHE_ENTRIES,
    // sort them by last modification time and remove the oldest excessive ones.
@@ -347,8 +337,21 @@ public:

    for (int i = 0; i < num_expired; ++i)
        llvm::sys::fs::remove(cache_files[i].first);
-    }
-};
+}
+
+void PystonObjectCache::calculateModuleHash(const llvm::Module* M, EffortLevel effort) {
+    HashOStream hash_stream;
+    llvm::WriteBitcodeToFile(M, hash_stream);
+    hash_stream << (int)effort;
+    hash_before_codegen = hash_stream.getHash();
+}
+
+bool PystonObjectCache::haveCacheFileForHash() {
+    llvm::SmallString<128> cache_file = cache_dir;
+    llvm::sys::path::append(cache_file, hash_before_codegen);
+    return llvm::sys::fs::exists(cache_file.str());
+}
+

 static void handle_sigusr1(int signum) {
    assert(signum == SIGUSR1);
@@ -425,8 +428,10 @@ void initCodegen() {
    g.engine = eb.create(g.tm);
    assert(g.engine && "engine creation failed?");

-    if (ENABLE_JIT_OBJECT_CACHE)
-        g.engine->setObjectCache(new PystonObjectCache());
+    if (ENABLE_JIT_OBJECT_CACHE) {
+        g.object_cache = new PystonObjectCache;
+        g.engine->setObjectCache(g.object_cache);
+    }

    g.i1 = llvm::Type::getInt1Ty(g.context);
    g.i8 = llvm::Type::getInt8Ty(g.context);

--- a/src/codegen/irgen.cpp
+++ b/src/codegen/irgen.cpp
@@ -20,6 +20,7 @@
 #include <stdint.h>

 #include "llvm/Analysis/Passes.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
@@ -1120,8 +1121,18 @@ CompiledFunction* doCompile(CLFunction* clfunc, SourceInfo* source, ParamNames*
    static StatCounter us_irgen("us_compiling_irgen");
    us_irgen.log(irgen_us);

+
+    // Calculate the module hash before doing any optimizations.
+    // This has the advantage that we can skip running the opt passes when we have cached object file
+    // but the disadvantage that optimizations are not allowed to add new symbolic constants...
+    if (ENABLE_JIT_OBJECT_CACHE) {
+        g.object_cache->calculateModuleHash(g.cur_module, effort);
+        if (ENABLE_LLVMOPTS && !g.object_cache->haveCacheFileForHash())
+            optimizeIR(f, effort);
+    } else {
        if (ENABLE_LLVMOPTS)
            optimizeIR(f, effort);
+    }

    g.cur_module = NULL;


--- a/src/codegen/irgen.h
+++ b/src/codegen/irgen.h
@@ -15,6 +15,8 @@
 #ifndef PYSTON_CODEGEN_IRGEN_H
 #define PYSTON_CODEGEN_IRGEN_H

+#include "llvm/ADT/SmallString.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Intrinsics.h"
@@ -150,6 +152,35 @@ public:

    ExceptionStyle preferredExceptionStyle() const { return unw_info.preferredExceptionStyle(); }
 };
+
+
+class PystonObjectCache : public llvm::ObjectCache {
+private:
+    llvm::SmallString<128> cache_dir;
+    std::string module_identifier;
+    std::string hash_before_codegen;
+
+public:
+    PystonObjectCache();
+
+
+#if LLVMREV < 216002
+    virtual void notifyObjectCompiled(const llvm::Module* M, const llvm::MemoryBuffer* Obj);
+#else
+    virtual void notifyObjectCompiled(const llvm::Module* M, llvm::MemoryBufferRef Obj);
+#endif
+
+#if LLVMREV < 215566
+    virtual llvm::MemoryBuffer* getObject(const llvm::Module* M);
+#else
+    virtual std::unique_ptr<llvm::MemoryBuffer> getObject(const llvm::Module* M);
+#endif
+
+    void cleanupCacheDirectory();
+
+    void calculateModuleHash(const llvm::Module* M, EffortLevel effort);
+    bool haveCacheFileForHash();
+};
 }

 #endif