Index: include/llvm/LTO/Caching.h =================================================================== --- /dev/null +++ include/llvm/LTO/Caching.h @@ -0,0 +1,59 @@ +//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the lto::CacheObjectOutput data structure, which allows +// clients to add a filesystem cache to ThinLTO +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LTO_CACHING_H +#define LLVM_LTO_CACHING_H + +#include "llvm/LTO/Config.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/MemoryBuffer.h" + +namespace llvm { +namespace lto { +/// Type for client-supplied callback when a buffer is loaded from the cache. +typedef std::function)> AddBufferFn; + +/// Manage caching on the filesystem. +class CacheObjectOutput : public NativeObjectOutput { + StringRef CacheDirectoryPath; ///< Path to the on-disk cache directory + SmallString<128> EntryPath; ///< Path to this entry in the cache. + SmallString<128> TempFilename; ///< Path to an optional temporary file. + /// User supplied callback. + AddBufferFn AddBuffer; +public: + ~CacheObjectOutput(); + + // Create a CacheObjectOutput: the client is supposed to create it in the + // callback supplied to LTO::run. The \p CacheDirectoryPath points to the + // directory on disk where to store the cache, and \p AddBuffer will be + // called when the buffer is pulled out of the cache (potentially after + // creating it). + CacheObjectOutput(StringRef CacheDirectoryPath, + AddBufferFn AddBuffer) + : CacheDirectoryPath(CacheDirectoryPath), AddBuffer(AddBuffer) {} + + // Return an allocated stream for the output, or null in case of failure. + std::unique_ptr getStream() override; + + // Try loading from a possible cache first, return true on cache hit. + bool tryLoadFromCache(StringRef Key) override; + + // Returns true if a cache is available + bool isCachingEnabled() const override { return !CacheDirectoryPath.empty(); } +}; + +} // namespace lto +} // namespace llvm + +#endif Index: include/llvm/LTO/Config.h =================================================================== --- include/llvm/LTO/Config.h +++ include/llvm/LTO/Config.h @@ -36,6 +36,13 @@ public: // Return an allocated stream for the output, or null in case of failure. virtual std::unique_ptr getStream() = 0; + + // Try loading from a possible cache first, return true on cache hit. + virtual bool tryLoadFromCache(StringRef Key) { return false; } + + // Returns true if a cache is available + virtual bool isCachingEnabled() const { return false; } + virtual ~NativeObjectOutput() = default; }; Index: lib/LTO/CMakeLists.txt =================================================================== --- lib/LTO/CMakeLists.txt +++ lib/LTO/CMakeLists.txt @@ -48,6 +48,7 @@ add_llvm_library(LLVMLTO + Caching.cpp LTO.cpp LTOBackend.cpp LTOModule.cpp Index: lib/LTO/Caching.cpp =================================================================== --- /dev/null +++ lib/LTO/Caching.cpp @@ -0,0 +1,75 @@ +//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Caching for ThinLTO. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/Caching.h" + +#ifdef HAVE_LLVM_REVISION +#include "LLVMLTORevision.h" +#endif + +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Filesystem.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/raw_ostream.h" + + +using namespace llvm; +using namespace llvm::lto; + +CacheObjectOutput::~CacheObjectOutput() { + // Rename to final destination (hopefully race condition won't matter here) + auto EC = sys::fs::rename(TempFilename, EntryPath); + if (EC) { + // Renaming failed, probably not the same filesystem, copy and delete. + auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename); + if (auto EC = ReloadedBufferOrErr.getError()) + report_fatal_error(Twine("Failed to open temp file '") + TempFilename + + "': " + EC.message() + "\n"); + + sys::fs::remove(TempFilename); + raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None); + if (EC) + report_fatal_error(Twine("Failed to open ") + EntryPath + + " to save cached entry\n"); + // I'm not sure what are the guarantee if two processes are doing this at + // the same time. + OS << (*ReloadedBufferOrErr)->getBuffer(); + AddBuffer(std::move(*ReloadedBufferOrErr)); + } + auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath); + if (auto EC = ReloadedBufferOrErr.getError()) + report_fatal_error(Twine("Can't reload cached file '") + EntryPath + + "': " + EC.message() + "\n"); + AddBuffer(std::move(*ReloadedBufferOrErr)); +} + +// Return an allocated stream for the output, or null in case of failure. +std::unique_ptr CacheObjectOutput::getStream() { + if (EntryPath.empty()) + return nullptr; + // Write to a temporary to avoid race condition + int TempFD; + std::error_code EC = + sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename); + if (EC) { + errs() << "Error: " << EC.message() << "\n"; + report_fatal_error("ThinLTO: Can't get a temporary file"); + } + return llvm::make_unique(TempFD, /* ShouldClose */ true); +} + +// Try loading from a possible cache first, return true on cache hit. +bool CacheObjectOutput::tryLoadFromCache(StringRef Key) { + sys::path::append(EntryPath, CacheDirectoryPath, Key); + return sys::fs::exists(EntryPath); +} Index: lib/LTO/LTO.cpp =================================================================== --- lib/LTO/LTO.cpp +++ lib/LTO/LTO.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/SHA1.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ThreadPool.h" @@ -41,6 +42,59 @@ using namespace lto; using namespace object; +#define DEBUG_TYPE "lto" + +// Returns a unique hash for the Module considering the current list of +// export/import. The hash is produced in \p Key. +static void computeCacheKey( + SmallString<40> &Key, const ModuleSummaryIndex &Index, StringRef ModuleID, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals) { + // Compute the unique hash for this entry + // This is based on the current compiler version, the module itself, the + // export list, the hash for every single module in the import list, the + // list of ResolvedODR for the module, and the list of preserved symbols. + SHA1 Hasher; + + // Start with the compiler revision + Hasher.update(LLVM_VERSION_STRING); +#ifdef HAVE_LLVM_REVISION + Hasher.update(LLVM_REVISION); +#endif + + // Include the hash for the current module + auto ModHash = Index.getModuleHash(ModuleID); + Hasher.update(ArrayRef((uint8_t *)&ModHash[0], sizeof(ModHash))); + for (auto F : ExportList) + // The export list can impact the internalization, be conservative here + Hasher.update(ArrayRef((uint8_t *)&F, sizeof(F))); + + // Include the hash for every module we import functions from + for (auto &Entry : ImportList) { + auto ModHash = Index.getModuleHash(Entry.first()); + Hasher.update(ArrayRef((uint8_t *)&ModHash[0], sizeof(ModHash))); + } + + // Include the hash for the resolved ODR. + for (auto &Entry : ResolvedODR) { + Hasher.update(ArrayRef((const uint8_t *)&Entry.first, + sizeof(GlobalValue::GUID))); + Hasher.update(ArrayRef((const uint8_t *)&Entry.second, + sizeof(GlobalValue::LinkageTypes))); + } + + // Include the hash for the preserved symbols. + for (auto &GS : DefinedGlobals) { + GlobalValue::LinkageTypes Linkage = GS.second->linkage(); + Hasher.update( + ArrayRef((const uint8_t *)&Linkage, sizeof(Linkage))); + } + + Key = toHex(Hasher.result()); +} + // Simple helper to load a module from bitcode std::unique_ptr llvm::loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context, @@ -408,9 +462,12 @@ ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {} virtual ~ThinBackendProc() {} - virtual Error start(unsigned Task, MemoryBufferRef MBRef, - const FunctionImporter::ImportMapTy &ImportList, - MapVector &ModuleMap) = 0; + virtual Error start( + unsigned Task, MemoryBufferRef MBRef, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap) = 0; virtual Error wait() = 0; }; @@ -429,35 +486,55 @@ ModuleToDefinedGVSummaries), BackendThreadPool(ThinLTOParallelismLevel) {} - Error - runThinLTOBackendThread(AddOutputFn AddOutput, unsigned Task, - MemoryBufferRef MBRef, - ModuleSummaryIndex &CombinedIndex, - const FunctionImporter::ImportMapTy &ImportList, - const GVSummaryMapTy &DefinedGlobals, - MapVector &ModuleMap) { - LLVMContext BackendContext; + Error runThinLTOBackendThread( + AddOutputFn AddOutput, unsigned Task, MemoryBufferRef MBRef, + ModuleSummaryIndex &CombinedIndex, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + const GVSummaryMapTy &DefinedGlobals, + MapVector &ModuleMap) { + + auto ModuleIdentifier = MBRef.getBufferIdentifier(); + SmallString<40> Key; + // The module may be cached, this helps handling it. + computeCacheKey(Key, CombinedIndex, ModuleIdentifier, ImportList, + ExportList, ResolvedODR, DefinedGlobals); + auto Output = AddOutput(Task); + if (Output->tryLoadFromCache(Key)) + return Error(); + LLVMContext BackendContext; ErrorOr> MOrErr = parseBitcodeFile(MBRef, BackendContext); assert(MOrErr && "Unable to load module in thread?"); - return thinBackend(Conf, Task, AddOutput, **MOrErr, CombinedIndex, + auto AddOutputWrapper = [&](unsigned TaskId) { + assert(Task == TaskId && "Unexpexted TaskId mismatch"); + return std::move(Output); + }; + return thinBackend(Conf, Task, AddOutputWrapper, **MOrErr, CombinedIndex, ImportList, DefinedGlobals, ModuleMap); } - Error start(unsigned Task, MemoryBufferRef MBRef, - const FunctionImporter::ImportMapTy &ImportList, - MapVector &ModuleMap) override { + Error start( + unsigned Task, MemoryBufferRef MBRef, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap) override { StringRef ModulePath = MBRef.getBufferIdentifier(); BackendThreadPool.async( [=](MemoryBufferRef MBRef, ModuleSummaryIndex &CombinedIndex, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map + &ResolvedODR, GVSummaryMapTy &DefinedGlobals, MapVector &ModuleMap) { - Error E = - runThinLTOBackendThread(AddOutput, Task, MBRef, CombinedIndex, - ImportList, DefinedGlobals, ModuleMap); + Error E = runThinLTOBackendThread( + AddOutput, Task, MBRef, CombinedIndex, ImportList, ExportList, + ResolvedODR, DefinedGlobals, ModuleMap); if (E) { std::unique_lock L(ErrMu); if (Err) @@ -467,6 +544,7 @@ } }, MBRef, std::ref(CombinedIndex), std::ref(ImportList), + std::ref(ExportList), std::ref(ResolvedODR), std::ref(ModuleToDefinedGVSummaries[ModulePath]), std::ref(ModuleMap)); return Error(); } @@ -529,9 +607,12 @@ return NewPath.str(); } - Error start(unsigned Task, MemoryBufferRef MBRef, - const FunctionImporter::ImportMapTy &ImportList, - MapVector &ModuleMap) override { + Error start( + unsigned Task, MemoryBufferRef MBRef, + const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map &ResolvedODR, + MapVector &ModuleMap) override { StringRef ModulePath = MBRef.getBufferIdentifier(); std::string NewModulePath = getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix); @@ -617,9 +698,16 @@ ExportedGUIDs.count(GUID); }; thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported); - thinLTOResolveWeakForLinkerInIndex( - ThinLTO.CombinedIndex, isPrevailing, - [](StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes) {}); + + StringMap> ResolvedODR; + auto recordNewLinkage = [&](StringRef ModuleIdentifier, + GlobalValue::GUID GUID, + GlobalValue::LinkageTypes NewLinkage) { + ResolvedODR[ModuleIdentifier][GUID] = NewLinkage; + }; + + thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing, + recordNewLinkage); std::unique_ptr BackendProc = ThinLTO.Backend( Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries, AddOutput); @@ -634,7 +722,8 @@ for (auto &Mod : ThinLTO.ModuleMap) { if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first], - ThinLTO.ModuleMap)) + ExportLists[Mod.first], + ResolvedODR[Mod.first], ThinLTO.ModuleMap)) return E; ++Task; Index: test/ThinLTO/X86/cache.ll =================================================================== --- test/ThinLTO/X86/cache.ll +++ test/ThinLTO/X86/cache.ll @@ -1,5 +1,5 @@ ; RUN: opt -module-summary %s -o %t.bc -; RUN: opt -module-summary %p/Inputs/funcimport.ll -o %t2.bc +; RUN: opt -module-summary %p/Inputs/cache.ll -o %t2.bc ; Verify that enabling caching is working ; RUN: rm -Rf %t.cache && mkdir %t.cache @@ -7,6 +7,14 @@ ; RUN: ls %t.cache/llvmcache.timestamp ; RUN: ls %t.cache | count 3 +; Verify that enabling caching is working with llvm-lto2 +; RUN: rm -Rf %t.cache && mkdir %t.cache +; RUN: llvm-lto2 -o %t.o %t2.bc %t.bc -cache-dir %t.cache \ +; RUN: -r=%t2.bc,_main,plx \ +; RUN: -r=%t2.bc,_globalfunc,lx \ +; RUN: -r=%t.bc,_globalfunc,plx +; RUN: ls %t.cache | count 2 + target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.11.0" Index: tools/llvm-lto2/llvm-lto2.cpp =================================================================== --- tools/llvm-lto2/llvm-lto2.cpp +++ tools/llvm-lto2/llvm-lto2.cpp @@ -16,6 +16,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/LTO/Caching.h" #include "llvm/LTO/LTO.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/TargetSelect.h" @@ -31,6 +32,9 @@ cl::desc("Output filename"), cl::value_desc("filename")); +static cl::opt CacheDir("cache-dir", cl::desc("Cache Directory"), + cl::value_desc("directory")); + static cl::opt SaveTemps("save-temps", cl::desc("Save temporary files")); static cl::list SymbolResolutions( @@ -172,9 +176,16 @@ if (HasErrors) return 1; - auto AddOutput = [&](size_t Task) { + auto AddOutput = + [&](size_t Task) -> std::unique_ptr { std::string Path = OutputFilename + "." + utostr(Task); - return llvm::make_unique(Path); + if (CacheDir.empty()) + return llvm::make_unique(Path); + + return llvm::make_unique( + CacheDir, [&](std::unique_ptr Buffer) { + *LTOOutput(Path).getStream() << Buffer->getBuffer(); + }); }; check(Lto.run(AddOutput), "LTO::run failed");