diff --git a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp --- a/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp +++ b/llvm/examples/SpeculativeJIT/SpeculativeJIT.cpp @@ -119,10 +119,7 @@ auto Work = [SharedMU, &JD]() { SharedMU->doMaterialize(JD); }; CompileThreads.async(std::move(Work)); }); - JITEvaluatedSymbol SpeculatorSymbol(JITTargetAddress(&S), - JITSymbolFlags::Exported); - ExitOnErr(this->ES->getMainJITDylib().define( - absoluteSymbols({{Mangle("__orc_speculator"), SpeculatorSymbol}}))); + ExitOnErr(S.addSpeculationRuntime(this->ES->getMainJITDylib(), Mangle)); LocalCXXRuntimeOverrides CXXRuntimeoverrides; ExitOnErr(CXXRuntimeoverrides.enable(this->ES->getMainJITDylib(), Mangle)); } diff --git a/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h b/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h --- a/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/SpeculateAnalyses.h @@ -7,12 +7,13 @@ //===----------------------------------------------------------------------===// // \file /// Contains the Analyses and Result Interpretation to select likely functions -/// to Speculatively compile before they are called. [Experimentation] +/// to Speculatively compile before they are called. [Purely Experimentation] //===----------------------------------------------------------------------===// #ifndef LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H #define LLVM_EXECUTIONENGINE_ORC_SPECULATEANALYSES_H +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/ExecutionEngine/Orc/Core.h" #include "llvm/ExecutionEngine/Orc/Speculation.h" @@ -22,17 +23,65 @@ namespace orc { -// Direct calls in high frequency basic blocks are extracted. -class BlockFreqQuery { -private: +// Provides interface to Queries. +class SpeculateQuery { +protected: void findCalles(const BasicBlock *, DenseSet &); - size_t numBBToGet(size_t); + bool isStraightLine(const Function &F); public: using ResultTy = Optional>>; + virtual ResultTy operator()(Function &) = 0; + virtual ~SpeculateQuery() = default; +}; +// Direct calls in high frequency basic blocks are extracted. +class BlockFreqQuery : public SpeculateQuery { +private: + size_t numBBToGet(size_t); +public: // Find likely next executables based on IR Block Frequency - ResultTy operator()(Function &F, FunctionAnalysisManager &FAM); + ResultTy operator()(Function &F) override; +}; + +// This Query generates a sequence of basic blocks which follows the order of +// execution. +// A handful of BB with higher block frequencies are taken, then path to entry +// and end BB are discovered by traversing up & down the CFG. + +class SequenceBBQuery : public SpeculateQuery { +private: + struct WalkDirection { + public: + bool Upward, Downward; + // the block associated contain a call + bool CallerBlock; + WalkDirection() : Upward(true), Downward(true), CallerBlock(false) {} + }; + +public: + using VisitedBlocksInfoTy = DenseMap; + using BlockListTy = SmallVector; + using BackEdgesInfoTy = + SmallVector, 8>; + using BlockFreqInfoTy = + SmallVector, 8>; + +private: + std::size_t getHottestBlocks(std::size_t TotalBlocks); + BlockListTy rearrangeBB(const Function &, const BlockListTy &); + BlockListTy queryCFG(Function &, const BlockListTy &); + void traverseToEntryBlock(const BasicBlock *, const BlockListTy &, + const BackEdgesInfoTy &, + const BranchProbabilityInfo *, + VisitedBlocksInfoTy &); + void traverseToExitBlock(const BasicBlock *, const BlockListTy &, + const BackEdgesInfoTy &, + const BranchProbabilityInfo *, + VisitedBlocksInfoTy &); + +public: + ResultTy operator()(Function &F) override; }; } // namespace orc diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h --- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h +++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h @@ -81,9 +81,7 @@ { std::lock_guard Lockit(ConcurrentAccess); auto It = GlobalSpecMap.find(FAddr); - // Kill this when jump on first call instrumentation is in place; - auto Iv = AlreadyExecuted.insert(FAddr); - if (It == GlobalSpecMap.end() || Iv.second == false) + if (It == GlobalSpecMap.end()) return; else CandidateSet = It->getSecond(); @@ -113,7 +111,12 @@ Speculator(Speculator &&) = delete; Speculator &operator=(const Speculator &) = delete; Speculator &operator=(Speculator &&) = delete; - ~Speculator() {} + ~Speculator() = default; + + /// Define symbols for this Speculator object (__orc_speculator) and the + /// speculation runtime entry point symbol (__orc_speculate_for) in the + /// given JITDylib. + Error addSpeculationRuntime(JITDylib &JD, MangleAndInterner &Mangle); // Speculatively compile likely functions for the given Stub Address. // destination of __orc_speculate_for jump @@ -142,33 +145,22 @@ ExecutionSession &getES() { return ES; } private: + static void speculateForEntryPoint(Speculator *Ptr, uint64_t StubId); std::mutex ConcurrentAccess; ImplSymbolMap &AliaseeImplTable; ExecutionSession &ES; - DenseSet AlreadyExecuted; StubAddrLikelies GlobalSpecMap; }; -// replace DenseMap with Pair + class IRSpeculationLayer : public IRLayer { public: using IRlikiesStrRef = Optional>>; - using ResultEval = - std::function; + using ResultEval = std::function; using TargetAndLikelies = DenseMap; IRSpeculationLayer(ExecutionSession &ES, IRCompileLayer &BaseLayer, Speculator &Spec, ResultEval Interpreter) : IRLayer(ES), NextLayer(BaseLayer), S(Spec), QueryAnalysis(Interpreter) { - PB.registerFunctionAnalyses(FAM); - } - - template < - typename AnalysisTy, - typename std::enable_if< - std::is_base_of, AnalysisTy>::value, - bool>::type = true> - void registerAnalysis() { - FAM.registerPass([]() { return AnalysisTy(); }); } void emit(MaterializationResponsibility R, ThreadSafeModule TSM); @@ -192,16 +184,9 @@ IRCompileLayer &NextLayer; Speculator &S; - PassBuilder PB; - FunctionAnalysisManager FAM; ResultEval QueryAnalysis; }; -// Runtime Function Interface -extern "C" { -void __orc_speculate_for(Speculator *, uint64_t stub_id); -} - } // namespace orc } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt --- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt +++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt @@ -35,4 +35,5 @@ LLVMAnalysis LLVMBitReader LLVMBitWriter + LLVMPasses ) diff --git a/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp b/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp --- a/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp +++ b/llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp @@ -9,14 +9,22 @@ #include "llvm/ExecutionEngine/Orc/SpeculateAnalyses.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Passes/PassBuilder.h" +#include "llvm/Support/ErrorHandling.h" + +#include namespace { using namespace llvm; -std::vector findBBwithCalls(const Function &F, - bool IndirectCall = false) { - std::vector BBs; +SmallVector findBBwithCalls(const Function &F, + bool IndirectCall = false) { + SmallVector BBs; auto findCallInst = [&IndirectCall](const Instruction &I) { if (auto Call = dyn_cast(&I)) { @@ -38,11 +46,12 @@ // Implementations of Queries shouldn't need to lock the resources // such as LLVMContext, each argument (function) has a non-shared LLVMContext +// Plus, if Queries contain states necessary locking scheme should be provided. namespace llvm { namespace orc { // Collect direct calls only -void BlockFreqQuery::findCalles(const BasicBlock *BB, +void SpeculateQuery::findCalles(const BasicBlock *BB, DenseSet &CallesNames) { assert(BB != nullptr && "Traversing Null BB to find calls?"); @@ -59,7 +68,16 @@ getCalledFunction(II); } -// blind calculation +bool SpeculateQuery::isStraightLine(const Function &F) { + if (F.getBasicBlockList().size() == 1) + return true; + return llvm::all_of(F.getBasicBlockList(), [](const BasicBlock &BB) -> bool { + return BB.getSingleSuccessor() ? true : false; + }); +} + +// BlockFreqQuery Implementations + size_t BlockFreqQuery::numBBToGet(size_t numBB) { // small CFG if (numBB < 4) @@ -71,12 +89,15 @@ return (numBB / 2) + (numBB / 4); } -BlockFreqQuery::ResultTy BlockFreqQuery:: -operator()(Function &F, FunctionAnalysisManager &FAM) { +BlockFreqQuery::ResultTy BlockFreqQuery::operator()(Function &F) { DenseMap> CallerAndCalles; DenseSet Calles; SmallVector, 8> BBFreqs; + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + auto IBBs = findBBwithCalls(F); if (IBBs.empty()) @@ -107,5 +128,192 @@ return CallerAndCalles; } + +// SequenceBBQuery Implementation +std::size_t SequenceBBQuery::getHottestBlocks(std::size_t TotalBlocks) { + if (TotalBlocks == 1) + return TotalBlocks; + else + return TotalBlocks / 2; +} + +// FIXME : find good implementation. +SequenceBBQuery::BlockListTy +SequenceBBQuery::rearrangeBB(const Function &F, const BlockListTy &BBList) { + BlockListTy RearrangedBBSet; + auto &AllBlocks = F.getBasicBlockList(); + for (auto &Block : AllBlocks) { + auto FItr = std::find(BBList.begin(), BBList.end(), &Block); + // found it + if (FItr != BBList.end()) + RearrangedBBSet.push_back(*FItr); + } + assert(RearrangedBBSet.size() == BBList.size() && + "BasicBlock missing while rearranging?"); + return RearrangedBBSet; +} + +void SequenceBBQuery::traverseToEntryBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) // already visited. + if (!Itr->second.Upward) + return; + else + Itr->second.Upward = false; + else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Upward = false; + // FIXME: Expensive Check + auto IsCallerBlock = llvm::find(CallerBlocks, AtBB); + if (IsCallerBlock != CallerBlocks.end()) { + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } else + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + const_pred_iterator PIt = pred_begin(AtBB), EIt = pred_end(AtBB); + // Move this check to top, when we have code setup to launch speculative + // compiles for function in entry BB, this triggers the speculative compiles + // before running the program. + if (PIt == EIt) // No Preds. + return; + + DenseSet PredSkipNodes; + for (auto I = BackEdgesInfo.begin(), E = BackEdgesInfo.end(); I != E; ++I) + // Since we are checking for predecessor's backedges, this Block + // occurs in second position. + if (I->second == AtBB) + PredSkipNodes.insert(I->first); + + // Skip predecessors which source of back-edges. + for (; PIt != EIt; ++PIt) + // checking EdgeHotness is cheaper + if (BPI->isEdgeHot(*PIt, AtBB) && !PredSkipNodes.count(*PIt)) + traverseToEntryBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +void SequenceBBQuery::traverseToExitBlock(const BasicBlock *AtBB, + const BlockListTy &CallerBlocks, + const BackEdgesInfoTy &BackEdgesInfo, + const BranchProbabilityInfo *BPI, + VisitedBlocksInfoTy &VisitedBlocks) { + auto Itr = VisitedBlocks.find(AtBB); + if (Itr != VisitedBlocks.end()) // already visited. + if (!Itr->second.Downward) + return; + else + Itr->second.Downward = false; + else { + // Create hint for newly discoverd blocks. + WalkDirection BlockHint; + BlockHint.Downward = false; + // FIXME: Expensive Check + auto IsCallerBlock = llvm::find(CallerBlocks, AtBB); + + if (IsCallerBlock != CallerBlocks.end()) { + BlockHint.CallerBlock = true; + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } else + VisitedBlocks.insert(std::make_pair(AtBB, BlockHint)); + } + + succ_const_iterator PIt = succ_begin(AtBB), EIt = succ_end(AtBB); + if (PIt == EIt) // No succs. + return; + + // If there are hot edges, then compute SuccSkipNodes. + DenseSet SuccSkipNodes; + // Since we are checking for successor's backedges, this Block + // occurs in first position. + for (auto I = BackEdgesInfo.begin(), E = BackEdgesInfo.end(); I != E; ++I) + if (I->first == AtBB) + SuccSkipNodes.insert(I->second); + + for (; PIt != EIt; ++PIt) + if (BPI->isEdgeHot(AtBB, *PIt) && !SuccSkipNodes.count(*PIt)) + traverseToExitBlock(*PIt, CallerBlocks, BackEdgesInfo, BPI, + VisitedBlocks); +} + +// Get Block frequencies for blocks and take most frquently executed block, +// walk towards the entry block from those blocks and discover the basic blocks +// with call. +SequenceBBQuery::BlockListTy +SequenceBBQuery::queryCFG(Function &F, const BlockListTy &CallerBlocks) { + + BlockFreqInfoTy BBFreqs; + VisitedBlocksInfoTy VisitedBlocks; + BackEdgesInfoTy BackEdgesInfo; + + PassBuilder PB; + FunctionAnalysisManager FAM; + PB.registerFunctionAnalyses(FAM); + + auto &BFI = FAM.getResult(F); + + llvm::FindFunctionBackedges(F, BackEdgesInfo); + + for (const auto I : CallerBlocks) + BBFreqs.push_back({I, BFI.getBlockFreq(I).getFrequency()}); + + llvm::sort(BBFreqs.begin(), BBFreqs.end(), + [](decltype(BBFreqs)::const_reference BBF, + decltype(BBFreqs)::const_reference BBS) { + return BBF.second > BBS.second ? true : false; + }); + + auto NHotBlocks = getHottestBlocks(BBFreqs.size()); + + BranchProbabilityInfo *BPI = + FAM.getCachedResult(F); + + // visit NHotBlocks, + // traverse upwards to entry + // traverse downwards to end. + + for (size_t i = 0; i < NHotBlocks; ++i) { + const BasicBlock *BB = BBFreqs[i].first; + traverseToEntryBlock(BB, CallerBlocks, BackEdgesInfo, BPI, VisitedBlocks); + traverseToExitBlock(BB, CallerBlocks, BackEdgesInfo, BPI, VisitedBlocks); + } + + BlockListTy MinCallerBlocks; + for (auto &I : VisitedBlocks) + if (I.second.CallerBlock) + MinCallerBlocks.push_back(std::move(I.first)); + + return rearrangeBB(F, MinCallerBlocks); +} + +SpeculateQuery::ResultTy SequenceBBQuery::operator()(Function &F) { + // reduce the number of lists! + DenseMap> CallerAndCalles; + DenseSet Calles; + BlockListTy SequencedBlocks; + BlockListTy CallerBlocks; + + CallerBlocks = findBBwithCalls(F); + if (CallerBlocks.empty()) + return None; + + if (isStraightLine(F)) + SequencedBlocks = rearrangeBB(F, CallerBlocks); + else + SequencedBlocks = queryCFG(F, CallerBlocks); + + for (auto BB : SequencedBlocks) + findCalles(BB, Calles); + + CallerAndCalles.insert({F.getName(), std::move(Calles)}); + return CallerAndCalles; +} + } // namespace orc } // namespace llvm diff --git a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp --- a/llvm/lib/ExecutionEngine/Orc/Speculation.cpp +++ b/llvm/lib/ExecutionEngine/Orc/Speculation.cpp @@ -36,11 +36,30 @@ } } +// Trigger Speculative Compiles. +void Speculator::speculateForEntryPoint(Speculator *Ptr, uint64_t StubId) { + assert(Ptr && " Null Address Received in orc_speculate_for "); + Ptr->speculateFor(StubId); +} + +Error Speculator::addSpeculationRuntime(JITDylib &JD, + MangleAndInterner &Mangle) { + JITEvaluatedSymbol ThisPtr(pointerToJITTargetAddress(this), + JITSymbolFlags::Exported); + JITEvaluatedSymbol SpeculateForEntryPtr( + pointerToJITTargetAddress(&speculateForEntryPoint), + JITSymbolFlags::Exported); + return JD.define(absoluteSymbols({ + {Mangle("__orc_speculator"), ThisPtr}, // Data Symbol + {Mangle("__orc_speculate_for"), SpeculateForEntryPtr} // Callable Symbol + })); +} + // If two modules, share the same LLVMContext, different threads must // not access those modules concurrently, doing so leave the // LLVMContext in in-consistent state. // But here since each TSM has a unique Context associated with it, -// on locking is necessary! +// locking is unnecessary! void IRSpeculationLayer::emit(MaterializationResponsibility R, ThreadSafeModule TSM) { @@ -67,14 +86,49 @@ // Simplify CFG helps the static branch prediction heuristics! for (auto &Fn : TSM.getModuleUnlocked()->getFunctionList()) { if (!Fn.isDeclaration()) { - auto IRNames = QueryAnalysis(Fn, FAM); + auto IRNames = QueryAnalysis(Fn); // Instrument and register if Query has result if (IRNames.hasValue()) { - Mutator.SetInsertPoint(&(Fn.getEntryBlock().front())); + + // Emit globals for each function. + auto LoadValueTy = Type::getInt8Ty(InContext); + auto SpeculatorGuard = + new GlobalVariable(*TSM.getModuleUnlocked(), LoadValueTy, false, + GlobalValue::LinkageTypes::InternalLinkage, + ConstantInt::get(LoadValueTy, 0), + "__orc_speculate.guard.for." + Fn.getName()); + SpeculatorGuard->setAlignment(1); + SpeculatorGuard->setUnnamedAddr(GlobalValue::UnnamedAddr::Local); + + BasicBlock &ProgramEntry = Fn.getEntryBlock(); + // Create BasicBlocks before the program's entry basicblock + BasicBlock *SpeculateBlock = BasicBlock::Create( + InContext, "__orc_speculate.block", &Fn, &ProgramEntry); + BasicBlock *SpeculateDecisionBlock = BasicBlock::Create( + InContext, "__orc_speculate.decision.block", &Fn, SpeculateBlock); + + assert(SpeculateDecisionBlock == &Fn.getEntryBlock() && + "SpeculateDecisionBlock not updated?"); + Mutator.SetInsertPoint(SpeculateDecisionBlock); + + auto LoadGuard = + Mutator.CreateLoad(LoadValueTy, SpeculatorGuard, "guard.value"); + // if just loaded value equal to 0,return true. + auto CanSpeculate = + Mutator.CreateICmpEQ(LoadGuard, ConstantInt::get(LoadValueTy, 0), + "compare.to.speculate"); + Mutator.CreateCondBr(CanSpeculate, SpeculateBlock, &ProgramEntry); + + Mutator.SetInsertPoint(SpeculateBlock); auto ImplAddrToUint = Mutator.CreatePtrToInt(&Fn, Type::getInt64Ty(InContext)); Mutator.CreateCall(RuntimeCallTy, RuntimeCall, {SpeclAddr, ImplAddrToUint}); + Mutator.CreateStore(ConstantInt::get(LoadValueTy, 1), SpeculatorGuard); + Mutator.CreateBr(&ProgramEntry); + + assert(Mutator.GetInsertBlock()->getParent() == &Fn && + "IR builder association mismatch?"); S.registerSymbols(internToJITSymbols(IRNames.getValue()), &R.getTargetJITDylib()); } @@ -87,11 +141,5 @@ NextLayer.emit(std::move(R), std::move(TSM)); } -// Runtime Function Implementation -extern "C" void __orc_speculate_for(Speculator *Ptr, uint64_t StubId) { - assert(Ptr && " Null Address Received in orc_speculate_for "); - Ptr->speculateFor(StubId); -} - } // namespace orc } // namespace llvm