Index: llvm/include/llvm/ProfileData/SampleProf.h =================================================================== --- llvm/include/llvm/ProfileData/SampleProf.h +++ llvm/include/llvm/ProfileData/SampleProf.h @@ -72,6 +72,10 @@ return Accumulator; } +namespace afdo_detail { +template struct IRTraits {}; +} // namespace afdo_detail + } // end namespace llvm namespace std { Index: llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h =================================================================== --- llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -41,63 +41,104 @@ #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h" namespace llvm { -using namespace llvm; using namespace sampleprof; using namespace sampleprofutil; using ProfileCount = Function::ProfileCount; #define DEBUG_TYPE "sample-profile-impl" -using BlockWeightMap = DenseMap; -using EquivalenceClassMap = DenseMap; -using Edge = std::pair; -using EdgeWeightMap = DenseMap; -using BlockEdgeMap = - DenseMap>; +namespace afdo_detail { + +template <> struct IRTraits { + using InstructionT = Instruction; + using BasicBlockT = BasicBlock; + using FunctionT = Function; + using BlockFrequencyInfoT = BlockFrequencyInfo; + using LoopT = Loop; + using LoopInfoT = LoopInfo; + using OptRemarkEmitterT = OptimizationRemarkEmitter; + using OptRemarkAnalysisT = OptimizationRemarkAnalysis; + using DominatorTreeT = DominatorTree; + using PostDominatorTreeT = PostDominatorTree; + static Function &getFunction(Function &F) { return F; } + static const BasicBlock *getEntryBB(const Function *F) { + return &F->getEntryBlock(); + } +}; + +} // end namespace afdo_detail extern cl::opt SampleProfileMaxPropagateIterations; extern cl::opt SampleProfileRecordCoverage; extern cl::opt SampleProfileSampleCoverage; extern cl::opt NoWarnSampleUnused; -class SampleProfileLoaderBaseImpl { +template class SampleProfileLoaderBaseImpl { public: SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {} void dump() { Reader->dump(); } + using InstructionT = typename afdo_detail::IRTraits::InstructionT; + using BasicBlockT = typename afdo_detail::IRTraits::BasicBlockT; + using BlockFrequencyInfoT = + typename afdo_detail::IRTraits::BlockFrequencyInfoT; + using FunctionT = typename afdo_detail::IRTraits::FunctionT; + using LoopT = typename afdo_detail::IRTraits::LoopT; + using LoopInfoT = typename afdo_detail::IRTraits::LoopInfoT; + using OptRemarkEmitterT = + typename afdo_detail::IRTraits::OptRemarkEmitterT; + using OptRemarkAnalysisT = + typename afdo_detail::IRTraits::OptRemarkAnalysisT; + using DominatorTreeT = typename afdo_detail::IRTraits::DominatorTreeT; + using PostDominatorTreeT = + typename afdo_detail::IRTraits::PostDominatorTreeT; + + using BlockWeightMap = DenseMap; + using EquivalenceClassMap = + DenseMap; + using Edge = std::pair; + using EdgeWeightMap = DenseMap; + using BlockEdgeMap = + DenseMap>; + protected: ~SampleProfileLoaderBaseImpl() = default; friend class SampleCoverageTracker; - inline unsigned getFunctionLoc(Function &F); - inline virtual ErrorOr getInstWeight(const Instruction &Inst); - inline ErrorOr getInstWeightImpl(const Instruction &Inst); - inline ErrorOr getBlockWeight(const BasicBlock *BB); + Function &getFunction(FunctionT &F) { + return afdo_detail::IRTraits::getFunction(F); + } + const BasicBlockT *getEntryBB(const FunctionT *F) { + return afdo_detail::IRTraits::getEntryBB(F); + } + + unsigned getFunctionLoc(FunctionT &Func); + virtual ErrorOr getInstWeight(const InstructionT &Inst); + ErrorOr getInstWeightImpl(const InstructionT &Inst); + ErrorOr getBlockWeight(const BasicBlockT *BB); mutable DenseMap DILocation2SampleMap; - inline virtual const FunctionSamples * - findFunctionSamples(const Instruction &I) const; - inline void printEdgeWeight(raw_ostream &OS, Edge E); - inline void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const; - inline void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB); - inline bool computeBlockWeights(Function &F); - inline void findEquivalenceClasses(Function &F); - template - inline void - findEquivalencesFor(BasicBlock *BB1, ArrayRef Descendants, - DominatorTreeBase *DomTree); - - inline void propagateWeights(Function &F); - inline uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, - Edge *UnknownEdge); - inline void buildEdges(Function &F); - inline bool propagateThroughEdges(Function &F, bool UpdateBlockCount); - inline void clearFunctionData(); - inline void computeDominanceAndLoopInfo(Function &F); - inline bool - computeAndPropagateWeights(Function &F, + virtual const FunctionSamples * + findFunctionSamples(const InstructionT &I) const; + void printEdgeWeight(raw_ostream &OS, Edge E); + void printBlockWeight(raw_ostream &OS, const BasicBlockT *BB) const; + void printBlockEquivalence(raw_ostream &OS, const BasicBlockT *BB); + bool computeBlockWeights(FunctionT &F); + void findEquivalenceClasses(FunctionT &F); + void findEquivalencesFor(BasicBlockT *BB1, + ArrayRef Descendants, + PostDominatorTreeT *DomTree); + + void propagateWeights(FunctionT &F); + uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge); + void buildEdges(FunctionT &F); + bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount); + void clearFunctionData(); + void computeDominanceAndLoopInfo(FunctionT &F); + bool + computeAndPropagateWeights(FunctionT &F, const DenseSet &InlinedGUIDs); - inline void emitCoverageRemarks(Function &F); + void emitCoverageRemarks(FunctionT &F); /// Map basic blocks to their computed weights. /// @@ -112,7 +153,7 @@ EdgeWeightMap EdgeWeights; /// Set of visited blocks during propagation. - SmallPtrSet VisitedBlocks; + SmallPtrSet VisitedBlocks; /// Set of visited edges during propagation. SmallSet VisitedEdges; @@ -126,9 +167,9 @@ EquivalenceClassMap EquivalenceClass; /// Dominance, post-dominance and loop information. - std::unique_ptr DT; - std::unique_ptr PDT; - std::unique_ptr LI; + std::unique_ptr DT; + std::unique_ptr PDT; + std::unique_ptr LI; /// Predecessors for each basic block in the CFG. BlockEdgeMap Predecessors; @@ -152,11 +193,12 @@ ProfileSummaryInfo *PSI = nullptr; /// Optimization Remark Emitter used to emit diagnostic remarks. - OptimizationRemarkEmitter *ORE = nullptr; + OptRemarkEmitterT *ORE = nullptr; }; /// Clear all the per-function data used to load samples and propagate weights. -void SampleProfileLoaderBaseImpl::clearFunctionData() { +template +void SampleProfileLoaderBaseImpl::clearFunctionData() { BlockWeights.clear(); EdgeWeights.clear(); VisitedBlocks.clear(); @@ -175,7 +217,8 @@ /// /// \param OS Stream to emit the output to. /// \param E Edge to print. -void SampleProfileLoaderBaseImpl::printEdgeWeight(raw_ostream &OS, Edge E) { +template +void SampleProfileLoaderBaseImpl::printEdgeWeight(raw_ostream &OS, Edge E) { OS << "weight[" << E.first->getName() << "->" << E.second->getName() << "]: " << EdgeWeights[E] << "\n"; } @@ -184,9 +227,10 @@ /// /// \param OS Stream to emit the output to. /// \param BB Block to print. -void SampleProfileLoaderBaseImpl::printBlockEquivalence(raw_ostream &OS, - const BasicBlock *BB) { - const BasicBlock *Equiv = EquivalenceClass[BB]; +template +void SampleProfileLoaderBaseImpl::printBlockEquivalence( + raw_ostream &OS, const BasicBlockT *BB) { + const BasicBlockT *Equiv = EquivalenceClass[BB]; OS << "equivalence[" << BB->getName() << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n"; } @@ -195,8 +239,9 @@ /// /// \param OS Stream to emit the output to. /// \param BB Block to print. -void SampleProfileLoaderBaseImpl::printBlockWeight(raw_ostream &OS, - const BasicBlock *BB) const { +template +void SampleProfileLoaderBaseImpl::printBlockWeight( + raw_ostream &OS, const BasicBlockT *BB) const { const auto &I = BlockWeights.find(BB); uint64_t W = (I == BlockWeights.end() ? 0 : I->second); OS << "weight[" << BB->getName() << "]: " << W << "\n"; @@ -214,13 +259,15 @@ /// \param Inst Instruction to query. /// /// \returns the weight of \p Inst. +template ErrorOr -SampleProfileLoaderBaseImpl::getInstWeight(const Instruction &Inst) { +SampleProfileLoaderBaseImpl::getInstWeight(const InstructionT &Inst) { return getInstWeightImpl(Inst); } +template ErrorOr -SampleProfileLoaderBaseImpl::getInstWeightImpl(const Instruction &Inst) { +SampleProfileLoaderBaseImpl::getInstWeightImpl(const InstructionT &Inst) { const FunctionSamples *FS = findFunctionSamples(Inst); if (!FS) return std::error_code(); @@ -238,7 +285,7 @@ CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get()); if (FirstMark) { ORE->emit([&]() { - OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst); + OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst); Remark << "Applied " << ore::NV("NumSamples", *R); Remark << " samples from profile (offset: "; Remark << ore::NV("LineOffset", LineOffset); @@ -267,11 +314,12 @@ /// \param BB The basic block to query. /// /// \returns the weight for \p BB. +template ErrorOr -SampleProfileLoaderBaseImpl::getBlockWeight(const BasicBlock *BB) { +SampleProfileLoaderBaseImpl::getBlockWeight(const BasicBlockT *BB) { uint64_t Max = 0; bool HasWeight = false; - for (auto &I : BB->getInstList()) { + for (auto &I : *BB) { const ErrorOr &R = getInstWeight(I); if (R) { Max = std::max(Max, R.get()); @@ -287,7 +335,8 @@ /// the weights of every basic block in the CFG. /// /// \param F The function to query. -bool SampleProfileLoaderBaseImpl::computeBlockWeights(Function &F) { +template +bool SampleProfileLoaderBaseImpl::computeBlockWeights(FunctionT &F) { bool Changed = false; LLVM_DEBUG(dbgs() << "Block weights\n"); for (const auto &BB : F) { @@ -312,8 +361,9 @@ /// \param Inst Instruction to query. /// /// \returns the FunctionSamples pointer to the inlined instance. -const FunctionSamples *SampleProfileLoaderBaseImpl::findFunctionSamples( - const Instruction &Inst) const { +template +const FunctionSamples *SampleProfileLoaderBaseImpl::findFunctionSamples( + const InstructionT &Inst) const { const DILocation *DIL = Inst.getDebugLoc(); if (!DIL) return Samples; @@ -348,11 +398,11 @@ /// \param DomTree Opposite dominator tree. If \p Descendants is filled /// with blocks from \p BB1's dominator tree, then /// this is the post-dominator tree, and vice versa. -template -void SampleProfileLoaderBaseImpl::findEquivalencesFor( - BasicBlock *BB1, ArrayRef Descendants, - DominatorTreeBase *DomTree) { - const BasicBlock *EC = EquivalenceClass[BB1]; +template +void SampleProfileLoaderBaseImpl::findEquivalencesFor( + BasicBlockT *BB1, ArrayRef Descendants, + PostDominatorTreeT *DomTree) { + const BasicBlockT *EC = EquivalenceClass[BB1]; uint64_t Weight = BlockWeights[EC]; for (const auto *BB2 : Descendants) { bool IsDomParent = DomTree->dominates(BB2, BB1); @@ -375,7 +425,8 @@ Weight = std::max(Weight, BlockWeights[BB2]); } } - if (EC == &EC->getParent()->getEntryBlock()) { + const BasicBlockT *EntryBB = getEntryBB(EC->getParent()); + if (EC == EntryBB) { BlockWeights[EC] = Samples->getHeadSamples() + 1; } else { BlockWeights[EC] = Weight; @@ -391,12 +442,13 @@ /// dominates B2, B2 post-dominates B1 and both are in the same loop. /// /// \param F The function to query. -void SampleProfileLoaderBaseImpl::findEquivalenceClasses(Function &F) { - SmallVector DominatedBBs; +template +void SampleProfileLoaderBaseImpl::findEquivalenceClasses(FunctionT &F) { + SmallVector DominatedBBs; LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n"); // Find equivalence sets based on dominance and post-dominance information. for (auto &BB : F) { - BasicBlock *BB1 = &BB; + BasicBlockT *BB1 = &BB; // Compute BB1's equivalence class once. if (EquivalenceClass.count(BB1)) { @@ -433,8 +485,8 @@ LLVM_DEBUG( dbgs() << "\nAssign the same weight to all blocks in the same class\n"); for (auto &BI : F) { - const BasicBlock *BB = &BI; - const BasicBlock *EquivBB = EquivalenceClass[BB]; + const BasicBlockT *BB = &BI; + const BasicBlockT *EquivBB = EquivalenceClass[BB]; if (BB != EquivBB) BlockWeights[BB] = BlockWeights[EquivBB]; LLVM_DEBUG(printBlockWeight(dbgs(), BB)); @@ -451,9 +503,10 @@ /// \param UnknownEdge Set if E has not been visited before. /// /// \returns E's weight, if known. Otherwise, return 0. -uint64_t SampleProfileLoaderBaseImpl::visitEdge(Edge E, - unsigned *NumUnknownEdges, - Edge *UnknownEdge) { +template +uint64_t SampleProfileLoaderBaseImpl::visitEdge(Edge E, + unsigned *NumUnknownEdges, + Edge *UnknownEdge) { if (!VisitedEdges.count(E)) { (*NumUnknownEdges)++; *UnknownEdge = E; @@ -476,13 +529,14 @@ /// has already been annotated. /// /// \returns True if new weights were assigned to edges or blocks. -bool SampleProfileLoaderBaseImpl::propagateThroughEdges(Function &F, - bool UpdateBlockCount) { +template +bool SampleProfileLoaderBaseImpl::propagateThroughEdges( + FunctionT &F, bool UpdateBlockCount) { bool Changed = false; LLVM_DEBUG(dbgs() << "\nPropagation through edges\n"); for (const auto &BI : F) { - const BasicBlock *BB = &BI; - const BasicBlock *EC = EquivalenceClass[BB]; + const BasicBlockT *BB = &BI; + const BasicBlockT *EC = EquivalenceClass[BB]; // Visit all the predecessor and successor edges to determine // which ones have a weight assigned already. Note that it doesn't @@ -569,7 +623,7 @@ EdgeWeights[UnknownEdge] = BBWeight - TotalWeight; else EdgeWeights[UnknownEdge] = 0; - const BasicBlock *OtherEC; + const BasicBlockT *OtherEC; if (i == 0) OtherEC = EquivalenceClass[UnknownEdge.first]; else @@ -625,15 +679,16 @@ /// /// We are interested in unique edges. If a block B1 has multiple /// edges to another block B2, we only add a single B1->B2 edge. -void SampleProfileLoaderBaseImpl::buildEdges(Function &F) { +template +void SampleProfileLoaderBaseImpl::buildEdges(FunctionT &F) { for (auto &BI : F) { - BasicBlock *B1 = &BI; + BasicBlockT *B1 = &BI; // Add predecessors for B1. - SmallPtrSet Visited; + SmallPtrSet Visited; if (!Predecessors[B1].empty()) llvm_unreachable("Found a stale predecessors list in a basic block."); - for (BasicBlock *B2 : predecessors(B1)) + for (BasicBlockT *B2 : predecessors(B1)) if (Visited.insert(B2).second) Predecessors[B1].push_back(B2); @@ -641,7 +696,7 @@ Visited.clear(); if (!Successors[B1].empty()) llvm_unreachable("Found a stale successors list in a basic block."); - for (BasicBlock *B2 : successors(B1)) + for (BasicBlockT *B2 : successors(B1)) if (Visited.insert(B2).second) Successors[B1].push_back(B2); } @@ -664,19 +719,20 @@ /// known, the weight for that edge is set to the weight of the block /// minus the weight of the other incoming edges to that block (if /// known). -void SampleProfileLoaderBaseImpl::propagateWeights(Function &F) { +template +void SampleProfileLoaderBaseImpl::propagateWeights(FunctionT &F) { bool Changed = true; unsigned I = 0; // If BB weight is larger than its corresponding loop's header BB weight, // use the BB weight to replace the loop header BB weight. for (auto &BI : F) { - BasicBlock *BB = &BI; - Loop *L = LI->getLoopFor(BB); + BasicBlockT *BB = &BI; + LoopT *L = LI->getLoopFor(BB); if (!L) { continue; } - BasicBlock *Header = L->getHeader(); + BasicBlockT *Header = L->getHeader(); if (Header && BlockWeights[BB] > BlockWeights[Header]) { BlockWeights[Header] = BlockWeights[BB]; } @@ -757,8 +813,9 @@ /// \param F The function to query. /// /// \returns true if \p F was modified. Returns false, otherwise. -bool SampleProfileLoaderBaseImpl::computeAndPropagateWeights( - Function &F, const DenseSet &InlinedGUIDs) { +template +bool SampleProfileLoaderBaseImpl::computeAndPropagateWeights( + FunctionT &F, const DenseSet &InlinedGUIDs) { bool Changed = (InlinedGUIDs.size() != 0); // Compute basic block weights. @@ -770,7 +827,7 @@ // Sets the GUIDs that are inlined in the profiled binary. This is used // for ThinLink to make correct liveness analysis, and also make the IR // match the profiled binary before annotation. - F.setEntryCount( + getFunction(F).setEntryCount( ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real), &InlinedGUIDs); @@ -787,15 +844,17 @@ return Changed; } -void SampleProfileLoaderBaseImpl::emitCoverageRemarks(Function &F) { +template +void SampleProfileLoaderBaseImpl::emitCoverageRemarks(FunctionT &F) { // If coverage checking was requested, compute it now. + const Function &Func = getFunction(F); if (SampleProfileRecordCoverage) { unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI); unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI); unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); if (Coverage < SampleProfileRecordCoverage) { - F.getContext().diagnose(DiagnosticInfoSampleProfile( - F.getSubprogram()->getFilename(), getFunctionLoc(F), + Func.getContext().diagnose(DiagnosticInfoSampleProfile( + Func.getSubprogram()->getFilename(), getFunctionLoc(F), Twine(Used) + " of " + Twine(Total) + " available profile records (" + Twine(Coverage) + "%) were applied", DS_Warning)); @@ -807,8 +866,8 @@ uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI); unsigned Coverage = CoverageTracker.computeCoverage(Used, Total); if (Coverage < SampleProfileSampleCoverage) { - F.getContext().diagnose(DiagnosticInfoSampleProfile( - F.getSubprogram()->getFilename(), getFunctionLoc(F), + Func.getContext().diagnose(DiagnosticInfoSampleProfile( + Func.getSubprogram()->getFilename(), getFunctionLoc(F), Twine(Used) + " of " + Twine(Total) + " available profile samples (" + Twine(Coverage) + "%) were applied", DS_Warning)); @@ -827,7 +886,9 @@ /// /// \returns the line number where \p F is defined. If it returns 0, /// it means that there is no debug information available for \p F. -unsigned SampleProfileLoaderBaseImpl::getFunctionLoc(Function &F) { +template +unsigned SampleProfileLoaderBaseImpl::getFunctionLoc(FunctionT &Func) { + const Function &F = getFunction(Func); if (DISubprogram *S = F.getSubprogram()) return S->getLine(); @@ -843,13 +904,15 @@ return 0; } -void SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo(Function &F) { - DT.reset(new DominatorTree); +template +void SampleProfileLoaderBaseImpl::computeDominanceAndLoopInfo( + FunctionT &F) { + DT.reset(new DominatorTreeT); DT->recalculate(F); PDT.reset(new PostDominatorTree(F)); - LI.reset(new LoopInfo); + LI.reset(new LoopInfoT); LI->analyze(*DT); } Index: llvm/lib/Transforms/IPO/SampleProfile.cpp =================================================================== --- llvm/lib/Transforms/IPO/SampleProfile.cpp +++ llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -332,7 +332,8 @@ /// This pass reads profile data from the file specified by /// -sample-profile-file and annotates every affected function with the /// profile information found in that file. -class SampleProfileLoader final : public SampleProfileLoaderBaseImpl { +class SampleProfileLoader final + : public SampleProfileLoaderBaseImpl { public: SampleProfileLoader( StringRef Name, StringRef RemapName, ThinOrFullLTOPhase LTOPhase,