diff --git a/llvm/examples/CMakeLists.txt b/llvm/examples/CMakeLists.txt --- a/llvm/examples/CMakeLists.txt +++ b/llvm/examples/CMakeLists.txt @@ -8,6 +8,7 @@ add_subdirectory(ModuleMaker) add_subdirectory(SpeculativeJIT) add_subdirectory(Bye) +add_subdirectory(ThinLtoJIT) if(LLVM_ENABLE_EH AND (NOT WIN32) AND (NOT "${LLVM_NATIVE_ARCH}" STREQUAL "ARM")) add_subdirectory(ExceptionDemo) diff --git a/llvm/examples/ThinLtoJIT/CMakeLists.txt b/llvm/examples/ThinLtoJIT/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/CMakeLists.txt @@ -0,0 +1,18 @@ +set(LLVM_LINK_COMPONENTS + Core + IRReader + OrcJIT + ExecutionEngine + Support + nativecodegen + Analysis + Passes + ) + +add_llvm_example(ThinLtoJIT + main.cpp + ThinLtoJIT.cpp + ThinLtoModuleIndex.cpp + ThinLtoInstrumentationLayer.cpp + ThinLtoDiscoveryThread.cpp + ) diff --git a/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h b/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.h @@ -0,0 +1,62 @@ +#ifndef LLVM_EXAMPLES_THINLTOJIT_DISCOVERYTHREAD_H +#define LLVM_EXAMPLES_THINLTOJIT_DISCOVERYTHREAD_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/ModuleSummaryIndex.h" + +#include "ThinLtoJIT.h" + +#include +#include + +namespace llvm { +namespace orc { + +class ExecutionSession; +class ThinLtoModuleIndex; +class ThinLtoInstrumentationLayer; + +class ThinLtoDiscoveryThread { +public: + ThinLtoDiscoveryThread(std::atomic &RunningFlag, ExecutionSession &ES, + JITDylib *MainJD, ThinLtoInstrumentationLayer &L, + ThinLtoModuleIndex &GlobalIndex, + ThinLtoJIT::AddModuleFunction AddModule, + unsigned LookaheadLevels, unsigned NumLoadThreads, + MangleAndInterner &Mangle, bool PrintStats) + : KeepRunning(RunningFlag), ES(ES), Layer(L), GlobalIndex(GlobalIndex), + AddModule(std::move(AddModule)), LookaheadLevels(LookaheadLevels), + NumLoadThreads(NumLoadThreads), Mangle(Mangle), + PrintStats(PrintStats) {} + + ~ThinLtoDiscoveryThread() { + if (PrintStats) + dump(errs()); + } + + void operator()(); + + void dump(raw_ostream &OS) { + OS << format("Modules submitted asynchronously: %d\n", NumModulesSubmitted); + } + +private: + std::atomic &KeepRunning; + ExecutionSession &ES; + ThinLtoInstrumentationLayer &Layer; + ThinLtoModuleIndex &GlobalIndex; + ThinLtoJIT::AddModuleFunction AddModule; + unsigned LookaheadLevels; + unsigned NumLoadThreads; + MangleAndInterner &Mangle; + bool PrintStats; + unsigned NumModulesSubmitted{0}; + + void spawnLookupForHighRankModules(); + SymbolStringPtr makeDummySymbolName(const std::string &Path) const; +}; + +} // namespace orc +} // namespace llvm + +#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp b/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoDiscoveryThread.cpp @@ -0,0 +1,65 @@ +#include "ThinLtoDiscoveryThread.h" + +#include "llvm/IR/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" + +#include "ThinLtoInstrumentationLayer.h" +#include "ThinLtoModuleIndex.h" + +#include + +#define DEBUG_TYPE "thinltojit" + +namespace llvm { +namespace orc { + +void ThinLtoDiscoveryThread::operator()() { + while (KeepRunning.load()) { + std::vector Indexes = Layer.takeFlagsThatFired(); + + if (!Indexes.empty()) { + LLVM_DEBUG(dbgs() << Indexes.size() << " new flags raised\n"); + auto ReachedFunctions = Layer.takeFlagOwners(std::move(Indexes)); + + for (GlobalValue::GUID F : ReachedFunctions) { + if (GlobalValueSummary *S = GlobalIndex.getSummary(F)) { + assert(isa(S) && "Reached symbols are functions"); + GlobalIndex.discoverCalleeModulePaths(cast(S), + LookaheadLevels); + } else { + LLVM_DEBUG(dbgs() << "No summary for GUID: " << F << "\n"); + } + } + + if (GlobalIndex.getNumDiscoveredModules() > 0) + spawnLookupForHighRankModules(); + } + } +} + +void ThinLtoDiscoveryThread::spawnLookupForHighRankModules() { + std::vector Paths = GlobalIndex.selectNextPaths(); + GlobalIndex.scheduleModuleParsing(Paths); + + // In order to add modules we need exclusive access to the execution session. + std::thread([this, Paths = std::move(Paths)]() { + ES.runSessionLocked([this, Paths = std::move(Paths)]() mutable { + for (const std::string &Path : Paths) { + ThreadSafeModule TSM = GlobalIndex.takeModule(Path); + if (!TSM) + // In the meantime the module was added synchronously. + continue; + + if (Error LoadErr = AddModule(std::move(TSM))) + // Failed to add the module to the session. + ES.reportError(std::move(LoadErr)); + + ++NumModulesSubmitted; + } + }); + }).detach(); +} + +} // namespace orc +} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.h @@ -0,0 +1,77 @@ +#ifndef LLVM_EXAMPLES_THINLTOJIT_DISCOVERYLAYER_H +#define LLVM_EXAMPLES_THINLTOJIT_DISCOVERYLAYER_H + +#include "llvm/ExecutionEngine/JITSymbol.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/Layer.h" +#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/raw_ostream.h" + +#include "ThinLtoJIT.h" + +#include +#include +#include +#include +#include + +namespace llvm { +namespace orc { + +class ThinLtoInstrumentationLayer : public IRLayer { +public: + ThinLtoInstrumentationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, + ThinLtoJIT::ExplicitMemoryBarrier MemFence, + unsigned FlagsPerBucket) + : IRLayer(ES, BaseLayer.getManglingOptions()), BaseLayer(BaseLayer), + MemFence(MemFence) { + // TODO: So far we only allocate one bucket. + allocateDiscoveryFlags(FlagsPerBucket); + } + + ~ThinLtoInstrumentationLayer() override; + + void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override; + + unsigned reserveDiscoveryFlags(unsigned Count); + void registerDiscoveryFlagOwners(std::vector Guids, + unsigned FirstIdx); + + void nudgeIntoDiscovery(std::vector Functions); + + std::vector takeFlagsThatFired(); + std::vector takeFlagOwners(std::vector Indexes); + + void dump(raw_ostream &OS); + +private: + IRCompileLayer &BaseLayer; + ThinLtoJIT::ExplicitMemoryBarrier MemFence; + + enum Flag : uint8_t { Clear = 0, Fired = 1 }; + + // Lock-free read access. + uint8_t *FlagsStorage; + Flag *FlagsIncoming; // lock-free write by design + Flag *FlagsHandled; + unsigned NumFlagsAllocated; + std::atomic NumFlagsUsed; // spin-lock + + // Acquire/release sync between writers and reader + std::atomic FlagsSync; + + // STL container requires locking for both, read and write access. + mutable std::mutex DiscoveryFlagsInfoLock; + std::map FlagOwnersMap; + + void allocateDiscoveryFlags(unsigned MinFlags); + void compileFunctionReachedFlagSetter(IRBuilder<> &B, Flag *F); +}; + +} // namespace orc +} // namespace llvm + +#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoInstrumentationLayer.cpp @@ -0,0 +1,225 @@ +#include "ThinLtoInstrumentationLayer.h" + +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Process.h" + +#include + +#define DEBUG_TYPE "thinltojit" + +namespace llvm { +namespace orc { + +// TODO: Fixed set of flags may not always be enough. Make this expandable. +void ThinLtoInstrumentationLayer::allocateDiscoveryFlags(unsigned MinFlags) { + // Round up to full memory pages. + unsigned PageSize = sys::Process::getPageSizeEstimate(); + unsigned NumPagesEach = (MinFlags + (PageSize - 1)) / PageSize; + unsigned NumPagesTotal = 2 * NumPagesEach; + assert(isPowerOf2_64(PageSize) && "Adjust aligned memory alloc below"); + + // Allocate one more page to make up for size loss due to alignment. + void *Storage = std::calloc(NumPagesTotal + 1, PageSize); + uint64_t StorageAddr = reinterpret_cast(Storage); + uint64_t PageSizeDecr = PageSize - 1; + uint64_t AlignedAddr = ((StorageAddr + PageSizeDecr) & ~PageSizeDecr); + uint64_t Diff = AlignedAddr - StorageAddr; + + // For each flag we allocate one byte in each location: Incoming and Handled. + // TODO: 'Handled' could be a bitset, but size must be dynamic + NumFlagsUsed.store(0); + NumFlagsAllocated = NumPagesEach * PageSize; + FlagsStorage = static_cast(Storage); + FlagsIncoming = reinterpret_cast(FlagsStorage + Diff); + FlagsHandled = FlagsIncoming + NumFlagsAllocated; + + static_assert(sizeof(FlagsIncoming[0]) == sizeof(uint8_t), "Flags are bytes"); + assert(reinterpret_cast(FlagsIncoming) % PageSize == 0); + assert(reinterpret_cast(FlagsHandled) % PageSize == 0); + assert(NumFlagsAllocated >= MinFlags); +} + +// Reserve a new set of discovery flags and return the index of the first one. +unsigned ThinLtoInstrumentationLayer::reserveDiscoveryFlags(unsigned Count) { +#ifndef NDEBUG + for (unsigned i = NumFlagsUsed.load(), e = i + Count; i < e; i++) { + assert(FlagsIncoming[i] == Clear); + } +#endif + + assert(Count > 0); + return NumFlagsUsed.fetch_add(Count); +} + +void ThinLtoInstrumentationLayer::registerDiscoveryFlagOwners( + std::vector Guids, unsigned FirstIdx) { + unsigned Count = Guids.size(); + + std::lock_guard Lock(DiscoveryFlagsInfoLock); + for (unsigned i = 0; i < Count; i++) { + assert(!FlagOwnersMap.count(FirstIdx + i) && + "Flag should not have an owner at this point"); + FlagOwnersMap[FirstIdx + i] = Guids[i]; + } +} + +std::vector ThinLtoInstrumentationLayer::takeFlagsThatFired() { + // This is only effective with the respective Release. + FlagsSync.load(std::memory_order_acquire); + + std::vector Indexes; + unsigned NumIndexesUsed = NumFlagsUsed.load(); + for (unsigned i = 0; i < NumIndexesUsed; i++) { + if (FlagsIncoming[i] == Fired && FlagsHandled[i] == Clear) { + FlagsHandled[i] = Fired; + Indexes.push_back(i); + } + } + + return Indexes; +} + +std::vector +ThinLtoInstrumentationLayer::takeFlagOwners(std::vector Indexes) { + std::vector ReachedFunctions; + std::lock_guard Lock(DiscoveryFlagsInfoLock); + + for (unsigned i : Indexes) { + auto KV = FlagOwnersMap.find(i); + assert(KV != FlagOwnersMap.end()); + ReachedFunctions.push_back(KV->second); + FlagOwnersMap.erase(KV); + } + + return ReachedFunctions; +} + +void ThinLtoInstrumentationLayer::nudgeIntoDiscovery( + std::vector Functions) { + unsigned Count = Functions.size(); + + // Registering synthetic flags in advance. We expect them to get processed + // before the respective functions get emitted. If not, the emit() function + unsigned FirstFlagIdx = reserveDiscoveryFlags(Functions.size()); + registerDiscoveryFlagOwners(std::move(Functions), FirstFlagIdx); + + // Initialize the flags as fired and force a cache sync, so discovery will + // pick them up as soon as possible. + for (unsigned i = FirstFlagIdx; i < FirstFlagIdx + Count; i++) { + FlagsIncoming[i] = Fired; + } + if (MemFence & ThinLtoJIT::FenceStaticCode) { + FlagsSync.store(0, std::memory_order_release); + } + + LLVM_DEBUG(dbgs() << "Nudged " << Count << " new functions into discovery\n"); +} + +void ThinLtoInstrumentationLayer::emit(MaterializationResponsibility R, + ThreadSafeModule TSM) { + TSM.withModuleDo([this](Module &M) { + std::vector FunctionsToInstrument; + + // We may have discovered ahead of some functions already, but we still + // instrument them all. Their notifications steer the future direction of + // discovery. + for (Function &F : M.getFunctionList()) + if (!F.isDeclaration()) + FunctionsToInstrument.push_back(&F); + + if (!FunctionsToInstrument.empty()) { + IRBuilder<> B(M.getContext()); + std::vector NewDiscoveryRoots; + + // Flags that fire must have owners registered. We will do it below and + // that's fine, because they can only be reached once the code is emitted. + unsigned FirstFlagIdx = + reserveDiscoveryFlags(FunctionsToInstrument.size()); + + unsigned NextFlagIdx = FirstFlagIdx; + for (Function *F : FunctionsToInstrument) { + // TODO: Emitting the write operation into an indirection stub would + // allow to skip it once we got the notification. + BasicBlock *E = &F->getEntryBlock(); + B.SetInsertPoint(BasicBlock::Create( + M.getContext(), "NotifyFunctionReachedProlog", F, E)); + compileFunctionReachedFlagSetter(B, FlagsIncoming + NextFlagIdx); + B.CreateBr(E); + + std::string GlobalName = GlobalValue::getGlobalIdentifier( + F->getName(), F->getLinkage(), M.getSourceFileName()); + NewDiscoveryRoots.push_back(GlobalValue::getGUID(GlobalName)); + ++NextFlagIdx; + } + + LLVM_DEBUG(dbgs() << "Instrumented " << NewDiscoveryRoots.size() + << " new functions in module " << M.getName() << "\n"); + + // Submit owner info, so the DiscoveryThread can evaluate the flags. + registerDiscoveryFlagOwners(std::move(NewDiscoveryRoots), FirstFlagIdx); + } + }); + + BaseLayer.emit(std::move(R), std::move(TSM)); +} + +void ThinLtoInstrumentationLayer::compileFunctionReachedFlagSetter( + IRBuilder<> &B, Flag *F) { + assert(*F == Clear); + Type *Int64Ty = Type::getInt64Ty(B.getContext()); + + // Write one immediate 8bit value to a fixed location in memory. + auto FlagAddr = pointerToJITTargetAddress(F); + Type *FlagTy = Type::getInt8Ty(B.getContext()); + B.CreateStore(ConstantInt::get(FlagTy, Fired), + B.CreateIntToPtr(ConstantInt::get(Int64Ty, FlagAddr), + FlagTy->getPointerTo())); + + if (MemFence & ThinLtoJIT::FenceJITedCode) { + // Overwrite the sync value with Release ordering. The discovery thread + // reads it with Acquire ordering. The actual value doesn't matter. + static constexpr bool IsVolatile = true; + static constexpr Instruction *NoInsertBefore = nullptr; + auto SyncFlagAddr = pointerToJITTargetAddress(&FlagsSync); + + B.Insert( + new StoreInst(ConstantInt::get(Int64Ty, 0), + B.CreateIntToPtr(ConstantInt::get(Int64Ty, SyncFlagAddr), + Int64Ty->getPointerTo()), + IsVolatile, MaybeAlign(64), AtomicOrdering::Release, + SyncScope::System, NoInsertBefore)); + } +} + +void ThinLtoInstrumentationLayer::dump(raw_ostream &OS) { + OS << "Discovery flags stats\n"; + + unsigned NumFlagsFired = 0; + for (unsigned i = 0; i < NumFlagsAllocated; i++) { + if (FlagsIncoming[i] == Fired) + ++NumFlagsFired; + } + OS << "Alloc: " << format("%6.d", NumFlagsAllocated) << "\n"; + OS << "Issued: " << format("%6.d", NumFlagsUsed.load()) << "\n"; + OS << "Fired: " << format("%6.d", NumFlagsFired) << "\n"; + + unsigned RemainingFlagOwners = 0; + for (const auto &_ : FlagOwnersMap) { + ++RemainingFlagOwners; + (void)_; + } + OS << "\nFlagOwnersMap has " << RemainingFlagOwners + << " remaining entries.\n"; +} + +ThinLtoInstrumentationLayer::~ThinLtoInstrumentationLayer() { + std::free(FlagsStorage); +} + +} // namespace orc +} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoJIT.h b/llvm/examples/ThinLtoJIT/ThinLtoJIT.h new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoJIT.h @@ -0,0 +1,123 @@ +#ifndef LLVM_EXAMPLES_THINLTOJIT_THINLTOJIT_H +#define LLVM_EXAMPLES_THINLTOJIT_THINLTOJIT_H + +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" +#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ThreadPool.h" + +#include +#include +#include +#include +#include + +namespace llvm { +namespace orc { + +class ThinLtoDiscoveryThread; +class ThinLtoInstrumentationLayer; +class ThinLtoModuleIndex; + +class CompileOnDemandLayer; +class IRCompileLayer; +class RTDyldObjectLinkingLayer; + +class JITDylib; +class LazyCallThroughManager; +class MangleAndInterner; + +class ThinLtoJIT { +public: + using AddModuleFunction = std::function; + + enum ExplicitMemoryBarrier { + NeverFence = 0, + FenceStaticCode = 1, + FenceJITedCode = 2, + AlwaysFence = 3 + }; + + ThinLtoJIT(ArrayRef InputFiles, StringRef MainFunctionName, + unsigned LookaheadLevels, unsigned NumCompileThreads, + unsigned NumLoadThreads, unsigned DiscoveryFlagsPerBucket, + ExplicitMemoryBarrier MemFence, bool AllowNudgeIntoDiscovery, + bool PrintStats, Error &Err); + ~ThinLtoJIT(); + + ThinLtoJIT(const ThinLtoJIT &) = delete; + ThinLtoJIT &operator=(const ThinLtoJIT &) = delete; + ThinLtoJIT(ThinLtoJIT &&) = delete; + ThinLtoJIT &operator=(ThinLtoJIT &&) = delete; + + Expected main(ArrayRef Args) { + auto MainSym = ES.lookup({MainJD}, MainFunctionMangled); + if (!MainSym) + return MainSym.takeError(); + + using MainFn = int(int, char *[]); + auto Main = jitTargetAddressToFunction(MainSym->getAddress()); + + return runAsMain(Main, Args, StringRef("ThinLtoJIT")); + } + +private: + ExecutionSession ES; + std::unique_ptr DL; + + // Local convenience class to allow late construction of the mangler while + // preserving the conventional Mangle(SymbolName) syntax. + struct MangleWrapper { + SymbolStringPtr operator()(StringRef S) { return Impl->operator()(S); } + char getGlobalPrefix() { return DL->getGlobalPrefix(); } + MangleAndInterner &getImpl() { return *Impl.get(); } + void init(ExecutionSession &ES, Module *M) { + DL = std::make_unique(M); + Impl = std::make_unique(ES, *DL); + } + std::unique_ptr Impl{nullptr}; + std::unique_ptr DL{nullptr}; + }; + + MangleWrapper Mangle; + + JITDylib *MainJD; + SymbolStringPtr MainFunctionMangled; + std::unique_ptr CompileThreads; + std::unique_ptr GlobalIndex; + + AddModuleFunction AddModule; + AddModuleFunction AddModuleAndLookup; + std::unique_ptr ObjLinkingLayer; + std::unique_ptr CompileLayer; + std::unique_ptr InstrumentationLayer; + std::unique_ptr OnDemandLayer; + + std::atomic JitRunning; + std::thread DiscoveryThread; + std::unique_ptr DiscoveryThreadWorker; + std::unique_ptr CallThroughManager; + + Error setupLayers(Triple TT, unsigned DiscoveryFlagsPerBucket, + ExplicitMemoryBarrier MemFence); + Error setupJITDylib(JITDylib *JD, bool AllowNudge, bool PrintStats); + Error setupDiscovery(JITDylib *MainJD, unsigned NumCompileThreads, + unsigned NumLoadThreads, unsigned LookaheadLevels, + bool PrintStats); + Expected setupMainModule(StringRef MainFunction); + + static void exitOnLazyCallThroughFailure() { + errs() << "Compilation failed. Aborting.\n"; + exit(1); + } +}; + +} // namespace orc +} // namespace llvm + +#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp b/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoJIT.cpp @@ -0,0 +1,305 @@ +#include "ThinLtoJIT.h" + +#include "llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h" +#include "llvm/ExecutionEngine/Orc/CompileUtils.h" +#include "llvm/ExecutionEngine/Orc/ExecutionUtils.h" +#include "llvm/ExecutionEngine/Orc/IRCompileLayer.h" +#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h" +#include "llvm/ExecutionEngine/Orc/JITTargetMachineBuilder.h" +#include "llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/Support/Debug.h" + +#include "ThinLtoDiscoveryThread.h" +#include "ThinLtoInstrumentationLayer.h" +#include "ThinLtoModuleIndex.h" + +#include +#include +#include + +#ifndef NDEBUG +#include +#endif + +#define DEBUG_TYPE "thinltojit" + +namespace llvm { +namespace orc { + +class ThinLtoDefinitionGenerator : public JITDylib::DefinitionGenerator { +public: + ThinLtoDefinitionGenerator(ThinLtoModuleIndex &GlobalIndex, + ThinLtoInstrumentationLayer &InstrumentationLayer, + ThinLtoJIT::AddModuleFunction AddModule, + char Prefix, bool AllowNudge, bool PrintStats) + : GlobalIndex(GlobalIndex), InstrumentationLayer(InstrumentationLayer), + AddModule(std::move(AddModule)), ManglePrefix(Prefix), + AllowNudgeIntoDiscovery(AllowNudge), PrintStats(PrintStats) {} + + ~ThinLtoDefinitionGenerator() { + if (PrintStats) + dump(errs()); + } + + Error tryToGenerate(LookupKind K, JITDylib &JD, + JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) override; + + void dump(raw_ostream &OS) { + OS << format("Modules submitted synchronously: %d\n", NumModulesMissed); + } + +private: + ThinLtoModuleIndex &GlobalIndex; + ThinLtoInstrumentationLayer &InstrumentationLayer; + ThinLtoJIT::AddModuleFunction AddModule; + char ManglePrefix; + bool AllowNudgeIntoDiscovery; + bool PrintStats; + unsigned NumModulesMissed{0}; + + // ThinLTO summaries encode unprefixed names. + StringRef stripGlobalManglePrefix(StringRef Symbol) const { + bool Strip = (ManglePrefix != '\0' && Symbol[0] == ManglePrefix); + return Strip ? StringRef(Symbol.data() + 1, Symbol.size() - 1) : Symbol; + } +}; + +Error ThinLtoDefinitionGenerator::tryToGenerate( + LookupKind K, JITDylib &JD, JITDylibLookupFlags JDLookupFlags, + const SymbolLookupSet &Symbols) { + std::set ModulePaths; + std::vector NewDiscoveryRoots; + + for (const auto &KV : Symbols) { + StringRef UnmangledName = stripGlobalManglePrefix(*KV.first); + auto Guid = GlobalValue::getGUID(UnmangledName); + if (GlobalValueSummary *S = GlobalIndex.getSummary(Guid)) { + // We could have discovered it ahead of time. + LLVM_DEBUG(dbgs() << format("Failed to discover symbol: %s\n", + UnmangledName.str().c_str())); + ModulePaths.insert(S->modulePath()); + if (AllowNudgeIntoDiscovery && isa(S)) { + NewDiscoveryRoots.push_back(Guid); + } + } + } + + NumModulesMissed += ModulePaths.size(); + + // Parse the requested modules if it hasn't happened yet. + GlobalIndex.scheduleModuleParsing(ModulePaths); + + for (StringRef Path : ModulePaths) { + ThreadSafeModule TSM = GlobalIndex.takeModule(Path); + assert(TSM && "We own the session lock, no asynchronous access possible"); + + if (Error LoadErr = AddModule(std::move(TSM))) + // Failed to add the module to the session. + return LoadErr; + + LLVM_DEBUG(dbgs() << "Generator: added " << Path << " synchronously\n"); + } + + // Requested functions that we failed to discover ahead of time, are likely + // close to the execution front. We can anticipate to run into them as soon + // as execution continues and trigger their discovery flags already now. This + // behavior is enabled with the 'allow-nudge' option and implemented below. + // On the one hand, it may give us a head start in a moment where discovery + // was lacking behind. On the other hand, we may bet on the wrong horse and + // waste extra time speculating in the wrong direction. + if (!NewDiscoveryRoots.empty()) { + assert(AllowNudgeIntoDiscovery); + InstrumentationLayer.nudgeIntoDiscovery(std::move(NewDiscoveryRoots)); + } + + return Error::success(); +} + +ThinLtoJIT::ThinLtoJIT(ArrayRef InputFiles, + StringRef MainFunctionName, unsigned LookaheadLevels, + unsigned NumCompileThreads, unsigned NumLoadThreads, + unsigned DiscoveryFlagsPerBucket, + ExplicitMemoryBarrier MemFence, + bool AllowNudgeIntoDiscovery, bool PrintStats, + Error &Err) { + ErrorAsOutParameter ErrAsOutParam(&Err); + + GlobalIndex = std::make_unique(ES, NumLoadThreads); + for (StringRef F : InputFiles) { + if (auto Err = GlobalIndex->add(F)) + ES.reportError(std::move(Err)); + } + + auto TSM = setupMainModule(MainFunctionName); + if (!TSM) { + Err = TSM.takeError(); + return; + } + if (!*TSM) { + Err = createStringError(inconvertibleErrorCode(), + "Failed to setup main module"); + return; + } + + ThreadSafeModule MainModule = std::move(*TSM); + Module *RawModule = MainModule.getModuleUnlocked(); + + // Now that we know the target data layout we can setup the mangler. + Mangle.init(ES, RawModule); + MainFunctionMangled = Mangle(MainFunctionName); + + Err = setupLayers(Triple(RawModule->getTargetTriple()), + DiscoveryFlagsPerBucket, MemFence); + if (Err) + return; + + MainJD = &ES.createJITDylib("main"); + Err = setupJITDylib(MainJD, AllowNudgeIntoDiscovery, PrintStats); + if (Err) + return; + + Err = setupDiscovery(MainJD, NumCompileThreads, NumLoadThreads, + LookaheadLevels, PrintStats); + if (Err) + return; + + Err = AddModule(std::move(MainModule)); + if (Err) + return; + + if (AllowNudgeIntoDiscovery) { + auto MainFunctionGuid = GlobalValue::getGUID(MainFunctionName); + InstrumentationLayer->nudgeIntoDiscovery({MainFunctionGuid}); + } + +#ifndef NDEBUG + // Uncomment to give the discovery thread some time do things. + // std::this_thread::sleep_for(std::chrono::milliseconds(1000)); +#endif +} + +Expected ThinLtoJIT::setupMainModule(StringRef MainFunction) { + Optional M = GlobalIndex->getModulePathForSymbol(MainFunction); + if (!M) { + std::string Buffer; + raw_string_ostream OS(Buffer); + OS << "No ValueInfo for symbol '" << MainFunction; + OS << "' in provided modules: "; + for (StringRef P : GlobalIndex->getAllModulePaths()) + OS << P << " "; + OS << "\n"; + return createStringError(inconvertibleErrorCode(), OS.str()); + } + + return GlobalIndex->parseModuleFromFile(*M); +} + +Error ThinLtoJIT::setupLayers(Triple TT, unsigned DiscoveryFlagsPerBucket, + ExplicitMemoryBarrier MemFence) { + ObjLinkingLayer = std::make_unique( + ES, []() { return std::make_unique(); }); + CompileLayer = std::make_unique( + ES, *ObjLinkingLayer, std::make_unique(JITTargetMachineBuilder(TT))); + + InstrumentationLayer = std::make_unique( + ES, *CompileLayer, MemFence, DiscoveryFlagsPerBucket); + + auto ISMB = createLocalIndirectStubsManagerBuilder(TT); + auto LCTM = createLocalLazyCallThroughManager( + TT, ES, pointerToJITTargetAddress(exitOnLazyCallThroughFailure)); + if (!LCTM) + return LCTM.takeError(); + + CallThroughManager = std::move(*LCTM); + OnDemandLayer = std::make_unique( + ES, *InstrumentationLayer, *CallThroughManager, std::move(ISMB)); + // Don't break up modules. Insert stubs on module boundaries. + OnDemandLayer->setPartitionFunction(CompileOnDemandLayer::compileWholeModule); + + AddModule = [this](ThreadSafeModule TSM) -> Error { + assert(MainJD && "Setup MainJD JITDylib before calling"); + StringRef Path = TSM.getModuleUnlocked()->getName(); + VModuleKey Id = GlobalIndex->getModuleId(Path); + return OnDemandLayer->add(*MainJD, std::move(TSM), Id); + }; + + return Error::success(); +} + +static bool IsTrivialModule(MaterializationUnit *MU) { + StringRef ModuleName = MU->getName(); + return ModuleName == "" || ModuleName == "" || + ModuleName == ""; +} + +Error ThinLtoJIT::setupDiscovery(JITDylib *MainJD, unsigned NumCompileThreads, + unsigned NumLoadThreads, + unsigned LookaheadLevels, bool PrintStats) { + // Delegate compilation to the thread pool. + CompileThreads = std::make_unique(NumCompileThreads); + ES.setDispatchMaterialization( + [this](JITDylib &JD, std::unique_ptr MU) { + if (IsTrivialModule(MU.get())) { + // This should be quick. And we maybe save a few session locks? + MU->doMaterialize(JD); + } else { + // FIXME: Drop the std::shared_ptr workaround once ThreadPool::async() + // accepts llvm::unique_function to define jobs. + auto SharedMU = std::shared_ptr(std::move(MU)); + CompileThreads->async([MU = std::move(SharedMU), &JD]() { + MU->doMaterialize(JD); + }); + } + }); + +#ifndef NDEBUG + // Uncomment to avoid discovering all at once when debugging small examples. + // LookaheadLevels = 1; +#endif + + // Spawn discovery thread and let it add newly discovered modules to the JIT. + JitRunning.store(true); + DiscoveryThreadWorker = std::make_unique( + JitRunning, ES, MainJD, *InstrumentationLayer, *GlobalIndex, AddModule, + LookaheadLevels, NumLoadThreads, Mangle.getImpl(), PrintStats); + + DiscoveryThread = std::thread(std::ref(*DiscoveryThreadWorker)); + return Error::success(); +} + +Error +ThinLtoJIT::setupJITDylib(JITDylib *JD, bool AllowNudge, bool PrintStats) { + // Register symbols for C++ static destructors. + LocalCXXRuntimeOverrides CXXRuntimeoverrides; + Error Err = CXXRuntimeoverrides.enable(*JD, *Mangle.Impl); + if (Err) + return Err; + + // Lookup symbol names in the global ThinLTO module index first + char Prefix = Mangle.getGlobalPrefix(); + JD->addGenerator(std::make_unique( + *GlobalIndex, *InstrumentationLayer, AddModule, Prefix, AllowNudge, + PrintStats)); + + // Then try lookup in the host process. + auto HostLookup = DynamicLibrarySearchGenerator::GetForCurrentProcess(Prefix); + if (!HostLookup) + return HostLookup.takeError(); + JD->addGenerator(std::move(*HostLookup)); + + return Error::success(); +} + +ThinLtoJIT::~ThinLtoJIT() { + // Signal the DiscoveryThread to shut down. + JitRunning.store(false); + DiscoveryThread.join(); + + // Wait for potential compile actions to finish. + CompileThreads->wait(); +} + +} // namespace orc +} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h b/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.h @@ -0,0 +1,94 @@ +#ifndef LLVM_EXAMPLES_THINLTOJIT_THINLTOJITMODULEINDEX_H +#define LLVM_EXAMPLES_THINLTOJIT_THINLTOJITMODULEINDEX_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ExecutionEngine/Orc/Core.h" +#include "llvm/ExecutionEngine/Orc/ThreadSafeModule.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/ModuleSummaryIndex.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/ThreadPool.h" + +#include +#include +#include +#include +#include + +namespace llvm { +namespace orc { + +class SymbolStringPtr; + +class ThinLtoModuleIndex { + static constexpr bool HaveGVs = false; + +public: + ThinLtoModuleIndex(ExecutionSession &ES, unsigned ParseModuleThreads) + : ES(ES), CombinedSummaryIndex(HaveGVs), + ParseModuleWorkers(ParseModuleThreads), + NumParseModuleThreads(ParseModuleThreads) {} + + Error add(StringRef InputPath); + GlobalValueSummary *getSummary(GlobalValue::GUID Function) const; + std::vector getAllModulePaths() const; + Optional getModulePathForSymbol(StringRef Name) const; + + template void scheduleModuleParsing(const RangeT &Paths); + ThreadSafeModule takeModule(StringRef Path); + + // Blocking module parsing, returns a Null-module on error. + // Only used for the main module. + ThreadSafeModule parseModuleFromFile(StringRef Path); + + std::vector selectNextPaths(); + unsigned getNumDiscoveredModules() const { return PathRank.size(); } + void discoverCalleeModulePaths(FunctionSummary *S, unsigned LookaheadLevels); + + VModuleKey getModuleId(StringRef Path) const { + return CombinedSummaryIndex.getModuleId(Path); + } + +private: + ExecutionSession &ES; + ModuleSummaryIndex CombinedSummaryIndex; + uint64_t NextModuleId{0}; + + struct PathRankEntry { + uint32_t Count{0}; + uint32_t MinDist{100}; + }; + StringMap PathRank; + + ThreadPool ParseModuleWorkers; + unsigned NumParseModuleThreads; + + std::mutex ScheduledModulesLock; + StringMap> ScheduledModules; + + std::mutex ParsedModulesLock; + StringMap ParsedModules; + + void updatePathRank(StringRef Path, unsigned Distance); + void addToWorklist(std::vector &List, + ArrayRef Calls); + + std::vector selectAllPaths(); + std::vector selectHotPaths(unsigned Count); + + void scheduleModuleParsingPrelocked(StringRef Path); + Expected doParseModule(StringRef Path); +}; + +template +inline void ThinLtoModuleIndex::scheduleModuleParsing(const RangeT &Paths) { + std::lock_guard Lock(ScheduledModulesLock); + for (const auto &Path : Paths) { + scheduleModuleParsingPrelocked(Path); + } +} + +} // namespace orc +} // namespace llvm + +#endif diff --git a/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp b/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/ThinLtoModuleIndex.cpp @@ -0,0 +1,255 @@ +#include "ThinLtoModuleIndex.h" + +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/ExecutionEngine/Orc/SymbolStringPool.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/raw_ostream.h" + +#include +#include + +#define DEBUG_TYPE "thinltojit" + +namespace llvm { +namespace orc { + +Error ThinLtoModuleIndex::add(StringRef InputPath) { + auto Buffer = errorOrToExpected(MemoryBuffer::getFile(InputPath)); + if (!Buffer) + return Buffer.takeError(); + + Error ParseErr = readModuleSummaryIndex((*Buffer)->getMemBufferRef(), + CombinedSummaryIndex, NextModuleId); + if (ParseErr) + return ParseErr; + +#ifndef NDEBUG + auto Paths = getAllModulePaths(); + unsigned TotalPaths = Paths.size(); + std::sort(Paths.begin(), Paths.end()); + Paths.erase(std::unique(Paths.begin(), Paths.end()), Paths.end()); + assert(TotalPaths == Paths.size() && "Module paths must be unique"); +#endif + + ++NextModuleId; + return Error::success(); +} + +std::vector ThinLtoModuleIndex::getAllModulePaths() const { + auto ModuleTable = CombinedSummaryIndex.modulePaths(); + + std::vector Paths; + Paths.resize(ModuleTable.size()); + + for (const auto &KV : ModuleTable) { + assert(Paths[KV.second.first].empty() && "IDs are unique and continuous"); + Paths[KV.second.first] = KV.first(); + } + + return Paths; +} + +GlobalValueSummary * +ThinLtoModuleIndex::getSummary(GlobalValue::GUID Function) const { + ValueInfo VI = CombinedSummaryIndex.getValueInfo(Function); + if (!VI || VI.getSummaryList().empty()) + return nullptr; + + // There can be more than one symbol with the same GUID, in the case of same- + // named locals in different but same-named source files that were compiled in + // their respective directories (so the source file name and resulting GUID is + // the same). We avoid this by checking that module paths are unique upon + // add(). + // + // TODO: We can still get duplicates on symbols declared with + // attribute((weak)), a GNU extension supported by gcc and clang. + // We should support it by looking for a symbol in the current module + // or in the same module as the caller. + assert(VI.getSummaryList().size() == 1 && "Weak symbols not yet supported"); + + return VI.getSummaryList().front().get()->getBaseObject(); +} + +Optional +ThinLtoModuleIndex::getModulePathForSymbol(StringRef Name) const { + if (GlobalValueSummary *S = getSummary(GlobalValue::getGUID(Name))) + return S->modulePath(); + return None; // We don't know the symbol. +} + +void ThinLtoModuleIndex::scheduleModuleParsingPrelocked(StringRef Path) { + // Once the module was scheduled, we can call takeModule(). + auto ScheduledIt = ScheduledModules.find(Path); + if (ScheduledIt != ScheduledModules.end()) + return; + + auto Worker = [this](std::string Path) { + if (auto TSM = doParseModule(Path)) { + std::lock_guard Lock(ParsedModulesLock); + ParsedModules[Path] = std::move(*TSM); + + LLVM_DEBUG(dbgs() << "Finished parsing module: " << Path << "\n"); + } else { + ES.reportError(TSM.takeError()); + } + }; + + LLVM_DEBUG(dbgs() << "Schedule module for parsing: " << Path << "\n"); + ScheduledModules[Path] = ParseModuleWorkers.async(Worker, Path.str()); +} + +ThreadSafeModule ThinLtoModuleIndex::takeModule(StringRef Path) { + std::unique_lock ParseLock(ParsedModulesLock); + + auto ParsedIt = ParsedModules.find(Path); + if (ParsedIt == ParsedModules.end()) { + ParseLock.unlock(); + + // The module is not ready, wait for the future we stored. + std::unique_lock ScheduleLock(ScheduledModulesLock); + auto ScheduledIt = ScheduledModules.find(Path); + assert(ScheduledIt != ScheduledModules.end() && + "Don't call for unscheduled modules"); + std::shared_future Future = ScheduledIt->getValue(); + ScheduleLock.unlock(); + Future.get(); + + ParseLock.lock(); + ParsedIt = ParsedModules.find(Path); + assert(ParsedIt != ParsedModules.end() && "Must be ready now"); + } + + // We only add each module once. If it's not here anymore, we can skip it. + ThreadSafeModule TSM = std::move(ParsedIt->getValue()); + ParsedIt->getValue() = ThreadSafeModule(); + return TSM; +} + +ThreadSafeModule ThinLtoModuleIndex::parseModuleFromFile(StringRef Path) { + std::lock_guard ScheduleLock(ScheduledModulesLock); + scheduleModuleParsingPrelocked(Path); + return takeModule(Path); +} + +Expected ThinLtoModuleIndex::doParseModule(StringRef Path) { + // TODO: make a SMDiagnosticError class for this + SMDiagnostic Err; + auto Ctx = std::make_unique(); + auto M = parseIRFile(Path, Err, *Ctx); + if (!M) { + std::string ErrDescription; + { + raw_string_ostream S(ErrDescription); + Err.print("ThinLtoJIT", S); + } + return createStringError(inconvertibleErrorCode(), + "Failed to load module from file '%s' (%s)", + Path.data(), ErrDescription.c_str()); + } + + return ThreadSafeModule(std::move(M), std::move(Ctx)); +} + +// We don't filter visited functions. Discovery will often be retriggered +// from the middle of already visited functions and it aims to reach a little +// further each time. +void ThinLtoModuleIndex::discoverCalleeModulePaths(FunctionSummary *S, + unsigned LookaheadLevels) { + // Populate initial worklist + std::vector Worklist; + addToWorklist(Worklist, S->calls()); + unsigned Distance = 0; + + while (++Distance < LookaheadLevels) { + // Process current worklist and populate a new one. + std::vector NextWorklist; + for (FunctionSummary *F : Worklist) { + updatePathRank(F->modulePath(), Distance); + addToWorklist(NextWorklist, F->calls()); + } + Worklist = std::move(NextWorklist); + } + + // Process the last worklist without filling a new one + for (FunctionSummary *F : Worklist) { + updatePathRank(F->modulePath(), Distance); + } + + // Drop known paths. This includes both, scheduled and parsed modules. + std::lock_guard Lock(ScheduledModulesLock); + for (const auto &KV : ScheduledModules) { + StringRef Path = KV.first(); + PathRank.erase(Path); + } +} + +void ThinLtoModuleIndex::addToWorklist( + std::vector &List, + ArrayRef Calls) { + for (const auto &Edge : Calls) { + const auto &SummaryList = Edge.first.getSummaryList(); + if (!SummaryList.empty()) { + GlobalValueSummary *S = SummaryList.front().get()->getBaseObject(); + assert(isa(S) && "Callees must be functions"); + List.push_back(cast(S)); + } + } +} + +// PathRank is global and continuous. +void ThinLtoModuleIndex::updatePathRank(StringRef Path, unsigned Distance) { + auto &Entry = PathRank[Path]; + Entry.Count += 1; + Entry.MinDist = std::min(Entry.MinDist, Distance); + assert(Entry.MinDist > 0 && "We want it as a divisor"); +}; + +// TODO: The size of a ThreadPool's task queue is not accessible. It would +// be great to know in order to estimate how many modules we schedule. The +// more we schedule, the less precise is the ranking. The less we schedule, +// the higher the risk for downtime. +std::vector ThinLtoModuleIndex::selectNextPaths() { + struct ScorePath { + float Score; + unsigned MinDist; + StringRef Path; + }; + + std::vector Candidates; + Candidates.reserve(PathRank.size()); + for (const auto &KV : PathRank) { + float Score = static_cast(KV.second.Count) / KV.second.MinDist; + if (Score > .0f) { + Candidates.push_back({Score, KV.second.MinDist, KV.first()}); + } + } + + // Sort candidates by descending score. + std::sort(Candidates.begin(), Candidates.end(), + [](const ScorePath &LHS, const ScorePath &RHS) { + return LHS.Score > RHS.Score; + }); + + // Sort highest score candidates by ascending minimal distance. + size_t Selected = std::min(std::max(NumParseModuleThreads, Candidates.size() / 2), Candidates.size()); + std::sort(Candidates.begin(), Candidates.begin() + Selected, + [](const ScorePath &LHS, const ScorePath &RHS) { + return LHS.MinDist < RHS.MinDist; + }); + + std::vector Paths; + Paths.reserve(Selected); + for (unsigned i = 0; i < Selected; i++) { + Paths.push_back(Candidates[i].Path.str()); + } + + LLVM_DEBUG(dbgs() << "ModuleIndex: select " << Paths.size() + << " out of " << Candidates.size() << " discovered paths\n"); + + return Paths; +} + +} // namespace orc +} // namespace llvm diff --git a/llvm/examples/ThinLtoJIT/bench b/llvm/examples/ThinLtoJIT/bench new file mode 100755 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/bench @@ -0,0 +1,100 @@ +#!/bin/bash +#set -x + +if [ $# -gt 2 ]; then + TOOLS_DIR="$1" + SOURCE_DIR="$2" + MAIN_SOURCE_FILE="$3" +else + echo "Usage: bench
[]" + exit 1 +fi + +if [ $# -gt 3 ]; then + SYS_ROOT="$4" +else + SYS_ROOT="/" +fi + +function check_tool () +{ + if [ -e "${TOOLS_DIR}/$1" ]; then + echo "Found: $1" + else + echo "!!! Cannot find required tool, please provide it in the LLVM binaries folder: $1" + fi +} + +check_tool lli +check_tool SpeculativeJIT +check_tool ThinLtoJIT + +SKIP_BITCODE_GEN=0 +if [[ -e bc-default || -e bc-thinlto || -e ll-default || -e ll-thinlto ]]; then + echo "Skipping bitcode generation: output directories existing" + echo "Please clean up manually: rm -R bc-default bc-thinlto ll-default ll-thinlto" + SKIP_BITCODE_GEN=1 +else + check_tool clang + check_tool llvm-dis + check_tool llvm-lto + mkdir bc-default + mkdir bc-thinlto + mkdir ll-default + mkdir ll-thinlto +fi + +ROOT_DIR=$(pwd) +ALL_BITCODE_FILES="" + +MAIN_FILE_BASENAME=$(basename "${MAIN_SOURCE_FILE%.c*}") +LLI_EXTRA_MODULES="" + +for f in ${SOURCE_DIR}/*.c* ; do + BASE_NAME=$(basename "${f%.c*}") + + if [ ${SKIP_BITCODE_GEN} -eq 0 ]; then + echo "Compile: $f -> ${BASE_NAME}.bc" + + ${TOOLS_DIR}/clang -c -I ${SOURCE_DIR} ${CFLAGS} -isysroot ${SYS_ROOT} -emit-llvm \ + -o "bc-default/${BASE_NAME}.bc" "$f" + ${TOOLS_DIR}/clang -c -I ${SOURCE_DIR} ${CFLAGS} -isysroot ${SYS_ROOT} -flto=thin \ + -o "bc-thinlto/${BASE_NAME}.bc" "$f" + + echo "Disassemble ${BASE_NAME}.bc -> ${BASE_NAME}.ll" + ${TOOLS_DIR}/llvm-dis bc-default/${BASE_NAME}.bc -o ll-default/${BASE_NAME}.ll + ${TOOLS_DIR}/llvm-dis bc-thinlto/${BASE_NAME}.bc -o ll-thinlto/${BASE_NAME}.ll + fi + + ALL_BITCODE_FILES="${ALL_BITCODE_FILES} ${BASE_NAME}.bc" + if [ "${BASE_NAME}" != "${MAIN_FILE_BASENAME}" ]; then + LLI_EXTRA_MODULES="${LLI_EXTRA_MODULES} -extra-module=${BASE_NAME}.bc" + fi +done + +if [ ${SKIP_BITCODE_GEN} -eq 0 ]; then + echo "Link global index file: index.thinlto.bc" + cd ${ROOT_DIR}/bc-thinlto + ${TOOLS_DIR}/llvm-lto --thinlto -o ${ROOT_DIR}/bc-thinlto/index ${ALL_BITCODE_FILES} + + echo "Disassemble global index file: index.thinlto.ll" + cd ${ROOT_DIR}/ll-thinlto + ${TOOLS_DIR}/llvm-dis -o index.thinlto.ll ${ROOT_DIR}/bc-thinlto/index.thinlto.bc +fi + +set -x +cd ${ROOT_DIR}/bc-default +time (${TOOLS_DIR}/clang -o ${MAIN_FILE_BASENAME} -O0 ${LDFLAGS} ${ALL_BITCODE_FILES} && ./${MAIN_FILE_BASENAME} ${EXEC_ARGS} 1>/dev/null) +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=mcjit "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-mcjit "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -compile-threads=8 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy -compile-threads=8 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy -compile-threads=8 -O1 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/lli ${LLI_EXTRA_MODULES} -jit-kind=orc-lazy -per-module-lazy -compile-threads=8 -O0 "${MAIN_FILE_BASENAME}.bc" ${EXEC_ARGS} 1>/dev/null +time ${TOOLS_DIR}/SpeculativeJIT -num-threads=8 ${ALL_BITCODE_FILES} --args ${EXEC_ARGS} 1>/dev/null + +cd ${ROOT_DIR}/bc-thinlto +#time (${TOOLS_DIR}/clang -flto=thin -o test ${ALL_BITCODE_FILES} && ./test ${EXEC_ARGS} 1>/dev/null) +time ${TOOLS_DIR}/ThinLtoJIT index.thinlto.bc --args ${EXEC_ARGS} 1>/dev/null diff --git a/llvm/examples/ThinLtoJIT/main.cpp b/llvm/examples/ThinLtoJIT/main.cpp new file mode 100644 --- /dev/null +++ b/llvm/examples/ThinLtoJIT/main.cpp @@ -0,0 +1,83 @@ +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/TargetSelect.h" + +#include "ThinLtoJIT.h" + +#include +#include + +using namespace llvm; + +static cl::list InputFiles(cl::Positional, cl::OneOrMore, + cl::desc("")); + +static cl::list InputArgs("args", cl::Positional, + cl::desc("..."), + cl::ZeroOrMore, cl::PositionalEatsArgs); + +static cl::opt CompileThreads("compile-threads", cl::Optional, + cl::desc("Number of compile threads"), + cl::init(4)); + +static cl::opt LoadThreads("load-threads", cl::Optional, + cl::desc("Number of module load threads"), + cl::init(8)); + +static cl::opt + LookaheadLevels("lookahead", cl::Optional, + cl::desc("Calls to look ahead of execution"), cl::init(4)); + +static cl::opt DiscoveryFlagsBucketSize( + "discovery-flag-bucket-size", cl::Optional, + cl::desc("Flags per bucket (rounds up to memory pages)"), cl::init(4096)); + +static cl::opt + MemFence("mem-fence", + cl::desc("Control memory fences for cache synchronization"), + cl::init(orc::ThinLtoJIT::AlwaysFence), + cl::values(clEnumValN(orc::ThinLtoJIT::NeverFence, "never", + "No use of memory fences"), + clEnumValN(orc::ThinLtoJIT::FenceStaticCode, "static", + "Use of memory fences in static code only"), + clEnumValN(orc::ThinLtoJIT::FenceJITedCode, "jited", + "Install memory fences in JITed code only"), + clEnumValN(orc::ThinLtoJIT::AlwaysFence, "always", + "Always use of memory fences"))); + +static cl::opt + AllowNudge("allow-nudge", + cl::desc("Allow the symbol generator to nudge symbols into " + "discovery even though they haven't been reached"), + cl::init(false)); + +static cl::opt + PrintStats("print-stats", + cl::desc(""), + cl::init(false)); + +int main(int argc, char *argv[]) { + InitLLVM X(argc, argv); + InitializeNativeTarget(); + InitializeNativeTargetAsmPrinter(); + cl::ParseCommandLineOptions(argc, argv, "ThinLtoJIT"); + + Error Err = Error::success(); + auto atLeastOne = [](unsigned N) { return std::max(1u, N); }; + + orc::ThinLtoJIT Jit(InputFiles, "main", atLeastOne(LookaheadLevels), + atLeastOne(CompileThreads), atLeastOne(LoadThreads), + DiscoveryFlagsBucketSize, MemFence, AllowNudge, + PrintStats, Err); + if (Err) { + logAllUnhandledErrors(std::move(Err), errs(), "ThinLtoJIT: "); + exit(1); + } + + ExitOnError ExitOnErr; + ExitOnErr.setBanner("ThinLtoJIT: "); + + return ExitOnErr(Jit.main(InputArgs)); +}