diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -424,6 +424,9 @@ /// Return branch info corresponding to an edge going to \p Succ basic block. BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ); + /// Return branch info corresponding to an edge going to \p Succ basic block. + const BinaryBranchInfo &getBranchInfo(const BinaryBasicBlock &Succ) const; + /// Return branch info corresponding to an edge going to a basic block with /// label \p Label. BinaryBranchInfo &getBranchInfo(const MCSymbol *Label); diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -1599,7 +1599,7 @@ /// Print function information to the \p OS stream. void print(raw_ostream &OS, std::string Annotation = "", - bool PrintInstructions = true) const; + bool PrintInstructions = true); /// Print all relocations between \p Offset and \p Offset + \p Size in /// this function. diff --git a/bolt/include/bolt/Core/DynoStats.h b/bolt/include/bolt/Core/DynoStats.h --- a/bolt/include/bolt/Core/DynoStats.h +++ b/bolt/include/bolt/Core/DynoStats.h @@ -142,11 +142,11 @@ /// The function relies on branch instructions being in-sync with CFG for /// branch instructions stats. Thus it is better to call it after /// fixBranches(). -DynoStats getDynoStats(const BinaryFunction &BF); +DynoStats getDynoStats(BinaryFunction &BF); /// Return program-wide dynostats. template -inline DynoStats getDynoStats(const FuncsType &Funcs, bool IsAArch64) { +inline DynoStats getDynoStats(FuncsType &Funcs, bool IsAArch64) { DynoStats dynoStats(IsAArch64); for (auto &BFI : Funcs) { auto &BF = BFI.second; @@ -158,9 +158,8 @@ /// Call a function with optional before and after dynostats printing. template -inline void callWithDynoStats(FnType &&Func, const FuncsType &Funcs, - StringRef Phase, const bool Flag, - bool IsAArch64) { +inline void callWithDynoStats(FnType &&Func, FuncsType &Funcs, StringRef Phase, + const bool Flag, bool IsAArch64) { DynoStats DynoStatsBefore(IsAArch64); if (Flag) DynoStatsBefore = getDynoStats(Funcs, IsAArch64); diff --git a/bolt/include/bolt/Passes/ReorderAlgorithm.h b/bolt/include/bolt/Passes/ReorderAlgorithm.h --- a/bolt/include/bolt/Passes/ReorderAlgorithm.h +++ b/bolt/include/bolt/Passes/ReorderAlgorithm.h @@ -39,7 +39,7 @@ /// Clusters vector. Also encode relative weights between two clusters in /// the ClusterEdges vector if requested. This vector is indexed by /// the clusters indices in the Clusters vector. - virtual void clusterBasicBlocks(const BinaryFunction &BF, + virtual void clusterBasicBlocks(BinaryFunction &BF, bool ComputeEdges = false) = 0; /// Compute for each cluster its averagae execution frequency, that is @@ -98,7 +98,7 @@ BBToClusterMapTy BBToClusterMap; public: - void clusterBasicBlocks(const BinaryFunction &BF, + void clusterBasicBlocks(BinaryFunction &BF, bool ComputeEdges = false) override; void reset() override; }; @@ -167,7 +167,7 @@ /// Reorder the basic blocks of the given function and store the new order in /// the new Clusters vector. - virtual void reorderBasicBlocks(const BinaryFunction &BF, + virtual void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const = 0; void setClusterAlgorithm(ClusterAlgorithm *CAlgo) { @@ -185,7 +185,7 @@ /// only be used for small functions. class TSPReorderAlgorithm : public ReorderAlgorithm { public: - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; @@ -196,7 +196,7 @@ explicit OptimizeReorderAlgorithm(std::unique_ptr CAlgo) : ReorderAlgorithm(std::move(CAlgo)) {} - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; @@ -209,7 +209,7 @@ std::unique_ptr CAlgo) : ReorderAlgorithm(std::move(CAlgo)) {} - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; @@ -222,21 +222,21 @@ std::unique_ptr CAlgo) : ReorderAlgorithm(std::move(CAlgo)) {} - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; /// A new reordering algorithm for basic blocks, ext-tsp class ExtTSPReorderAlgorithm : public ReorderAlgorithm { public: - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; /// Toy example that simply reverses the original basic block order. class ReverseReorderAlgorithm : public ReorderAlgorithm { public: - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; @@ -247,7 +247,7 @@ std::unique_ptr CAlgo) : ReorderAlgorithm(std::move(CAlgo)) {} - void reorderBasicBlocks(const BinaryFunction &BF, + void reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const override; }; diff --git a/bolt/lib/Core/BinaryBasicBlock.cpp b/bolt/lib/Core/BinaryBasicBlock.cpp --- a/bolt/lib/Core/BinaryBasicBlock.cpp +++ b/bolt/lib/Core/BinaryBasicBlock.cpp @@ -581,15 +581,17 @@ BinaryBasicBlock::BinaryBranchInfo & BinaryBasicBlock::getBranchInfo(const BinaryBasicBlock &Succ) { - auto BI = branch_info_begin(); - for (BinaryBasicBlock *BB : successors()) { - if (&Succ == BB) - return *BI; - ++BI; - } + return const_cast( + static_cast(*this).getBranchInfo(Succ)); +} - llvm_unreachable("Invalid successor"); - return *BI; +const BinaryBasicBlock::BinaryBranchInfo & +BinaryBasicBlock::getBranchInfo(const BinaryBasicBlock &Succ) const { + const auto Zip = llvm::zip(successors(), branch_info()); + const auto Result = llvm::find_if( + Zip, [&](const auto &Tuple) { return std::get<0>(Tuple) == &Succ; }); + assert(Result != Zip.end() && "Cannot find target in successors"); + return std::get<1>(*Result); } BinaryBasicBlock::BinaryBranchInfo & diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -396,11 +396,20 @@ } void BinaryFunction::dump(bool PrintInstructions) const { - print(dbgs(), "", PrintInstructions); + // getDynoStats calls FunctionLayout::updateLayoutIndices and + // BasicBlock::analyzeBranch. The former cannot be const, but should be + // removed, the latter should be made const, but seems to require refactoring. + // Forcing all callers to have a non-const reference to BinaryFunction to call + // dump non-const however is not ideal either. Adding this const_cast is right + // now the best solution. It is safe, because BinaryFunction itself is not + // modified. Only BinaryBasicBlocks are actually modified (if it all) and we + // have mutable pointers to those regardless whether this function is + // const-qualified or not. + const_cast(*this).print(dbgs(), "", PrintInstructions); } void BinaryFunction::print(raw_ostream &OS, std::string Annotation, - bool PrintInstructions) const { + bool PrintInstructions) { if (!opts::shouldPrint(*this)) return; @@ -3558,9 +3567,9 @@ assert(hasCFG() && "function is expected to have CFG"); - BasicBlockListType Order; + SmallVector Order; if (UseDFS) - Order = dfs(); + llvm::copy(dfs(), std::back_inserter(Order)); else llvm::copy(Layout.blocks(), std::back_inserter(Order)); diff --git a/bolt/lib/Core/DynoStats.cpp b/bolt/lib/Core/DynoStats.cpp --- a/bolt/lib/Core/DynoStats.cpp +++ b/bolt/lib/Core/DynoStats.cpp @@ -158,7 +158,7 @@ } } -DynoStats getDynoStats(const BinaryFunction &BF) { +DynoStats getDynoStats(BinaryFunction &BF) { auto &BC = BF.getBinaryContext(); DynoStats Stats(/*PrintAArch64Stats*/ BC.isAArch64()); diff --git a/bolt/lib/Passes/AsmDump.cpp b/bolt/lib/Passes/AsmDump.cpp --- a/bolt/lib/Passes/AsmDump.cpp +++ b/bolt/lib/Passes/AsmDump.cpp @@ -195,7 +195,7 @@ std::unordered_set CallReferences; MAP->OutStreamer->emitCFIStartProc(/*IsSimple=*/false); - for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { + for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) { OS << BB->getName() << ": \n"; const std::string BranchLabel = Twine(BB->getName(), "_br").str(); diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -1450,11 +1450,11 @@ if (!opts::PrintSortedBy.empty() && !llvm::is_contained(opts::PrintSortedBy, DynoStats::FIRST_DYNO_STAT)) { - std::vector Functions; + std::vector Functions; std::map Stats; - for (const auto &BFI : BC.getBinaryFunctions()) { - const BinaryFunction &BF = BFI.second; + for (auto &BFI : BC.getBinaryFunctions()) { + BinaryFunction &BF = BFI.second; if (shouldOptimize(BF) && BF.hasValidProfile()) { Functions.push_back(&BF); Stats.emplace(&BF, getDynoStats(BF)); @@ -1554,9 +1554,9 @@ // Collect and print information about suboptimal code layout on input. if (opts::ReportBadLayout) { - std::vector SuboptimalFuncs; + std::vector SuboptimalFuncs; for (auto &BFI : BC.getBinaryFunctions()) { - const BinaryFunction &BF = BFI.second; + BinaryFunction &BF = BFI.second; if (!BF.hasValidProfile()) continue; diff --git a/bolt/lib/Passes/CacheMetrics.cpp b/bolt/lib/Passes/CacheMetrics.cpp --- a/bolt/lib/Passes/CacheMetrics.cpp +++ b/bolt/lib/Passes/CacheMetrics.cpp @@ -117,9 +117,9 @@ for (BinaryFunction *SrcFunction : BinaryFunctions) { const BinaryContext &BC = SrcFunction->getBinaryContext(); - for (BinaryBasicBlock *BB : SrcFunction->getLayout().blocks()) { + for (const BinaryBasicBlock *BB : SrcFunction->getLayout().blocks()) { // Find call instructions and extract target symbols from each one - for (MCInst &Inst : *BB) { + for (const MCInst &Inst : *BB) { if (!BC.MIB->isCall(Inst)) continue; diff --git a/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp b/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp --- a/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp +++ b/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp @@ -441,7 +441,7 @@ } class ExtTSP { public: - ExtTSP(const BinaryFunction &BF) : BF(BF) { initialize(); } + ExtTSP(BinaryFunction &BF) : BF(BF) { initialize(); } /// Run the algorithm and return an ordering of basic block void run(BinaryFunction::BasicBlockOrderType &Order) { @@ -849,7 +849,7 @@ private: // The binary function - const BinaryFunction &BF; + BinaryFunction &BF; // All CFG nodes (basic blocks) std::vector AllBlocks; @@ -864,7 +864,7 @@ std::vector AllEdges; }; -void ExtTSPReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF, +void ExtTSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -12,10 +12,12 @@ #include "bolt/Passes/IdenticalCodeFolding.h" #include "bolt/Core/ParallelUtilities.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ThreadPool.h" #include "llvm/Support/Timer.h" #include +#include #include #include #include @@ -166,16 +168,15 @@ return false; // Process both functions in either DFS or existing order. - const BinaryFunction::BasicBlockOrderType OrderA = - opts::UseDFS - ? A.dfs() - : BinaryFunction::BasicBlockOrderType(A.getLayout().block_begin(), - A.getLayout().block_end()); - const BinaryFunction::BasicBlockOrderType OrderB = - opts::UseDFS - ? B.dfs() - : BinaryFunction::BasicBlockOrderType(B.getLayout().block_begin(), - B.getLayout().block_end()); + SmallVector OrderA; + SmallVector OrderB; + if (opts::UseDFS) { + copy(A.dfs(), std::back_inserter(OrderA)); + copy(B.dfs(), std::back_inserter(OrderB)); + } else { + copy(A.getLayout().blocks(), std::back_inserter(OrderA)); + copy(B.getLayout().blocks(), std::back_inserter(OrderB)); + } const BinaryContext &BC = A.getBinaryContext(); diff --git a/bolt/lib/Passes/ReorderAlgorithm.cpp b/bolt/lib/Passes/ReorderAlgorithm.cpp --- a/bolt/lib/Passes/ReorderAlgorithm.cpp +++ b/bolt/lib/Passes/ReorderAlgorithm.cpp @@ -75,7 +75,7 @@ for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) { double Freq = 0.0; uint64_t ClusterSize = 0; - for (BinaryBasicBlock *BB : Clusters[I]) { + for (const BinaryBasicBlock *BB : Clusters[I]) { if (BB->getNumNonPseudos() > 0) { Freq += BB->getExecutionCount(); // Estimate the size of a block in bytes at run time @@ -94,7 +94,7 @@ errs() << " (frequency: " << AvgFreq[I] << ")"; errs() << " : "; const char *Sep = ""; - for (BinaryBasicBlock *BB : Clusters[I]) { + for (const BinaryBasicBlock *BB : Clusters[I]) { errs() << Sep << BB->getName(); Sep = ", "; } @@ -122,7 +122,7 @@ return A.Src == B.Src && A.Dst == B.Dst; } -void GreedyClusterAlgorithm::clusterBasicBlocks(const BinaryFunction &BF, +void GreedyClusterAlgorithm::clusterBasicBlocks(BinaryFunction &BF, bool ComputeEdges) { reset(); @@ -148,7 +148,7 @@ BBToClusterMap[BB] = I; // Populate priority queue with edges. auto BI = BB->branch_info_begin(); - for (BinaryBasicBlock *&I : BB->successors()) { + for (const BinaryBasicBlock *I : BB->successors()) { assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE && "attempted reordering blocks of function with no profile data"); Queue.emplace_back(EdgeTy(BB, I, BI->Count)); @@ -191,7 +191,7 @@ if (areClustersCompatible(ClusterA, ClusterB, E)) { // Case 3: SrcBB is at the end of a cluster and DstBB is at the start, // allowing us to merge two clusters. - for (BinaryBasicBlock *BB : ClusterB) + for (const BinaryBasicBlock *BB : ClusterB) BBToClusterMap[BB] = I; ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end()); ClusterB.clear(); @@ -398,7 +398,7 @@ Weight.clear(); } -void TSPReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF, +void TSPReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const { std::vector> Weight; std::vector IndexToBB; @@ -413,7 +413,7 @@ IndexToBB.push_back(BB); } Weight.resize(N); - for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { + for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) { auto BI = BB->branch_info_begin(); Weight[BB->getLayoutIndex()].resize(N); for (BinaryBasicBlock *SuccBB : BB->successors()) { @@ -501,7 +501,7 @@ } void OptimizeReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; @@ -517,7 +517,7 @@ } void OptimizeBranchReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; @@ -622,7 +622,7 @@ } void OptimizeCacheReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; @@ -675,7 +675,7 @@ } } -void ReverseReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF, +void ReverseReorderAlgorithm::reorderBasicBlocks(BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; @@ -687,7 +687,7 @@ } void RandomClusterReorderAlgorithm::reorderBasicBlocks( - const BinaryFunction &BF, BasicBlockOrder &Order) const { + BinaryFunction &BF, BasicBlockOrder &Order) const { if (BF.getLayout().block_empty()) return; diff --git a/bolt/lib/Passes/StokeInfo.cpp b/bolt/lib/Passes/StokeInfo.cpp --- a/bolt/lib/Passes/StokeInfo.cpp +++ b/bolt/lib/Passes/StokeInfo.cpp @@ -46,11 +46,11 @@ void StokeInfo::checkInstr(const BinaryFunction &BF, StokeFuncInfo &FuncInfo) { MCPlusBuilder *MIB = BF.getBinaryContext().MIB.get(); BitVector RegV(NumRegs, false); - for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { + for (const BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->empty()) continue; - for (MCInst &It : *BB) { + for (const MCInst &It : *BB) { if (MIB->isPseudo(It)) continue; // skip function with exception handling yet diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -865,8 +865,8 @@ if (From > To) return false; - BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); - BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); + const BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); + const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); if (!FromBB || !ToBB) return false; @@ -875,7 +875,8 @@ // the previous block (that instruction should be a call). if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { - BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1); + const BinaryBasicBlock *PrevBB = + BF.getLayout().getBlock(FromBB->getIndex() - 1); if (PrevBB->getSuccessor(FromBB->getLabel())) { const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); if (Instr && BC.MIB->isCall(*Instr)) diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -674,7 +674,7 @@ BinaryContext &BC = BF.getBinaryContext(); BinaryBasicBlock *FromBB = BF.getBasicBlockContainingOffset(From); - BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); + const BinaryBasicBlock *ToBB = BF.getBasicBlockContainingOffset(To); if (!FromBB || !ToBB) { LLVM_DEBUG(dbgs() << "failed to get block for recorded branch\n"); @@ -703,7 +703,7 @@ if (!OffsetMatches) { // Skip the nops to support old .fdata uint64_t Offset = ToBB->getOffset(); - for (MCInst &Instr : *ToBB) { + for (const MCInst &Instr : *ToBB) { if (!BC.MIB->isNoop(Instr)) break; @@ -739,7 +739,8 @@ // The real destination is the layout successor of the detected ToBB. if (ToBB == BF.getLayout().block_back()) return false; - BinaryBasicBlock *NextBB = BF.getLayout().getBlock(ToBB->getIndex() + 1); + const BinaryBasicBlock *NextBB = + BF.getLayout().getBlock(ToBB->getIndex() + 1); assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout"); ToBB = NextBB; } diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp --- a/bolt/lib/Rewrite/BoltDiff.cpp +++ b/bolt/lib/Rewrite/BoltDiff.cpp @@ -183,7 +183,7 @@ RI1.getTotalScore(); } - double getNormalizedScore(BinaryBasicBlock::branch_info_iterator BIIter, + double getNormalizedScore(BinaryBasicBlock::const_branch_info_iterator BIIter, const RewriteInstance &Ctx) { double Score = BIIter->Count == BinaryBasicBlock::COUNT_NO_PROFILE ? 0 : BIIter->Count;