diff --git a/bolt/include/bolt/Core/BinaryBasicBlock.h b/bolt/include/bolt/Core/BinaryBasicBlock.h --- a/bolt/include/bolt/Core/BinaryBasicBlock.h +++ b/bolt/include/bolt/Core/BinaryBasicBlock.h @@ -671,6 +671,8 @@ void markValid(const bool Valid) { IsValid = Valid; } + unsigned getFragmentNum() const { return IsCold ? 1 : 0; } + bool isCold() const { return IsCold; } void setIsCold(const bool Flag) { IsCold = Flag; } diff --git a/bolt/include/bolt/Core/BinaryFunction.h b/bolt/include/bolt/Core/BinaryFunction.h --- a/bolt/include/bolt/Core/BinaryFunction.h +++ b/bolt/include/bolt/Core/BinaryFunction.h @@ -30,6 +30,7 @@ #include "bolt/Core/BinaryLoop.h" #include "bolt/Core/BinarySection.h" #include "bolt/Core/DebugData.h" +#include "bolt/Core/FunctionLayout.h" #include "bolt/Core/JumpTable.h" #include "bolt/Core/MCPlus.h" #include "bolt/Utils/NameResolver.h" @@ -556,10 +557,8 @@ using BasicBlockListType = SmallVector; BasicBlockListType BasicBlocks; BasicBlockListType DeletedBasicBlocks; - BasicBlockOrderType BasicBlocksLayout; - /// Previous layout replaced by modifyLayout - BasicBlockOrderType BasicBlocksPreviousLayout; - bool ModifiedLayout{false}; + + FunctionLayout Layout; /// BasicBlockOffsets are used during CFG construction to map from code /// offsets to BinaryBasicBlocks. Any modifications made to the CFG @@ -758,12 +757,6 @@ using const_reverse_iterator = pointee_iterator; - typedef BasicBlockOrderType::iterator order_iterator; - typedef BasicBlockOrderType::const_iterator const_order_iterator; - typedef BasicBlockOrderType::reverse_iterator reverse_order_iterator; - typedef BasicBlockOrderType::const_reverse_iterator - const_reverse_order_iterator; - // CFG iterators. iterator begin() { return BasicBlocks.begin(); } const_iterator begin() const { return BasicBlocks.begin(); } @@ -792,49 +785,6 @@ BasicBlockListType::iterator pbegin() { return BasicBlocks.begin(); } BasicBlockListType::iterator pend() { return BasicBlocks.end(); } - order_iterator layout_begin() { return BasicBlocksLayout.begin(); } - const_order_iterator layout_begin() const - { return BasicBlocksLayout.begin(); } - order_iterator layout_end() { return BasicBlocksLayout.end(); } - const_order_iterator layout_end() const - { return BasicBlocksLayout.end(); } - reverse_order_iterator layout_rbegin() - { return BasicBlocksLayout.rbegin(); } - const_reverse_order_iterator layout_rbegin() const - { return BasicBlocksLayout.rbegin(); } - reverse_order_iterator layout_rend() - { return BasicBlocksLayout.rend(); } - const_reverse_order_iterator layout_rend() const - { return BasicBlocksLayout.rend(); } - size_t layout_size() const { return BasicBlocksLayout.size(); } - bool layout_empty() const { return BasicBlocksLayout.empty(); } - const BinaryBasicBlock *layout_front() const - { return BasicBlocksLayout.front(); } - BinaryBasicBlock *layout_front() { return BasicBlocksLayout.front(); } - const BinaryBasicBlock *layout_back() const - { return BasicBlocksLayout.back(); } - BinaryBasicBlock *layout_back() { return BasicBlocksLayout.back(); } - - inline iterator_range layout() { - return iterator_range(BasicBlocksLayout.begin(), - BasicBlocksLayout.end()); - } - - inline iterator_range layout() const { - return iterator_range(BasicBlocksLayout.begin(), - BasicBlocksLayout.end()); - } - - inline iterator_range rlayout() { - return iterator_range(BasicBlocksLayout.rbegin(), - BasicBlocksLayout.rend()); - } - - inline iterator_range rlayout() const { - return iterator_range( - BasicBlocksLayout.rbegin(), BasicBlocksLayout.rend()); - } - cfi_iterator cie_begin() { return CIEFrameInstructions.begin(); } const_cfi_iterator cie_begin() const { return CIEFrameInstructions.begin(); } cfi_iterator cie_end() { return CIEFrameInstructions.end(); } @@ -885,27 +835,16 @@ return *this; } - /// Update layout of basic blocks used for output. - void updateBasicBlockLayout(BasicBlockOrderType &NewLayout) { - assert(NewLayout.size() == BasicBlocks.size() && "Layout size mismatch."); + inline FunctionLayout &getLayout() { return Layout; } - BasicBlocksPreviousLayout = BasicBlocksLayout; - if (NewLayout != BasicBlocksLayout) { - ModifiedLayout = true; - BasicBlocksLayout.clear(); - BasicBlocksLayout.swap(NewLayout); - } - } + inline const FunctionLayout &getLayout() const { return Layout; } /// Recompute landing pad information for the function and all its blocks. void recomputeLandingPads(); - /// Return current basic block layout. - const BasicBlockOrderType &getLayout() const { return BasicBlocksLayout; } - /// Return a list of basic blocks sorted using DFS and update layout indices /// using the same order. Does not modify the current layout. - BasicBlockOrderType dfs() const; + BasicBlockListType dfs() const; /// Find the loops in the CFG of the function and store information about /// them. @@ -962,26 +901,6 @@ return I == LabelToBB.end() ? nullptr : I->second; } - /// Returns the basic block after the given basic block in the layout or - /// nullptr the last basic block is given. - const BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB, - bool IgnoreSplits = true) const { - return const_cast(this)->getBasicBlockAfter(BB, - IgnoreSplits); - } - - BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB, - bool IgnoreSplits = true) { - for (auto I = layout_begin(), E = layout_end(); I != E; ++I) { - auto Next = std::next(I); - if (*I == BB && Next != E) { - return (IgnoreSplits || (*I)->isCold() == (*Next)->isCold()) ? *Next - : nullptr; - } - } - return nullptr; - } - /// Retrieve the landing pad BB associated with invoke instruction \p Invoke /// that is in \p BB. Return nullptr if none exists BinaryBasicBlock *getLandingPadBBFor(const BinaryBasicBlock &BB, @@ -1457,8 +1376,9 @@ /// Return true if the function body is non-contiguous. bool isSplit() const { - return isSimple() && layout_size() && - layout_front()->isCold() != layout_back()->isCold(); + return isSimple() && !getLayout().block_empty() && + getLayout().block_front()->isCold() != + getLayout().block_back()->isCold(); } bool shouldPreserveNops() const { return PreserveNops; } @@ -1579,8 +1499,7 @@ BinaryBasicBlock *BB = BasicBlocks.back(); BB->setIndex(BasicBlocks.size() - 1); - BB->setLayoutIndex(layout_size()); - BasicBlocksLayout.emplace_back(BB); + Layout.addBasicBlock(BB); return BB; } @@ -1627,13 +1546,6 @@ /// layout after the BB indicated by Start. void updateLayout(BinaryBasicBlock *Start, const unsigned NumNewBlocks); - /// Make sure basic blocks' indices match the current layout. - void updateLayoutIndices() const { - unsigned Index = 0; - for (BinaryBasicBlock *BB : layout()) - BB->setLayoutIndex(Index++); - } - /// Recompute the CFI state for NumNewBlocks following Start after inserting /// new blocks into the CFG. This must be called after updateLayout. void updateCFIState(BinaryBasicBlock *Start, const unsigned NumNewBlocks); @@ -2212,14 +2124,6 @@ /// and size. uint64_t getFunctionScore() const; - /// Return true if the layout has been changed by basic block reordering, - /// false otherwise. - bool hasLayoutChanged() const; - - /// Get the edit distance of the new layout with respect to the previous - /// layout after basic block reordering. - uint64_t getEditDistance() const; - /// Get the number of instructions within this function. uint64_t getInstructionCount() const; @@ -2428,7 +2332,7 @@ struct GraphTraits : public GraphTraits { static NodeRef getEntryNode(bolt::BinaryFunction *F) { - return *F->layout_begin(); + return F->getLayout().front().front(); } using nodes_iterator = pointer_iterator; @@ -2448,7 +2352,7 @@ struct GraphTraits : public GraphTraits { static NodeRef getEntryNode(const bolt::BinaryFunction *F) { - return *F->layout_begin(); + return F->getLayout().front().front(); } using nodes_iterator = pointer_iterator; @@ -2468,7 +2372,7 @@ struct GraphTraits> : public GraphTraits> { static NodeRef getEntryNode(Inverse G) { - return *G.Graph->layout_begin(); + return G.Graph->getLayout().front().front(); } }; @@ -2476,7 +2380,7 @@ struct GraphTraits> : public GraphTraits> { static NodeRef getEntryNode(Inverse G) { - return *G.Graph->layout_begin(); + return G.Graph->getLayout().front().front(); } }; diff --git a/bolt/include/bolt/Core/FunctionLayout.h b/bolt/include/bolt/Core/FunctionLayout.h new file mode 100644 --- /dev/null +++ b/bolt/include/bolt/Core/FunctionLayout.h @@ -0,0 +1,200 @@ +//===- bolt/Core/FunctionLayout.h - Fragmented Function Layout --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains the declaration of the FunctionLayout class. The layout of +// a function is the order of basic blocks, in which we will arrange them in the +// new binary. Although the blocks of a function are contiguous, we can split +// the layout into multiple fragments. The blocks within a fragment are +// contiguous, but the fragments itself are disjoint. This is used to separate +// the blocks into hot and cold sections. +// +//===----------------------------------------------------------------------===// + +#ifndef BOLT_CORE_FUNCTION_LAYOUT_H +#define BOLT_CORE_FUNCTION_LAYOUT_H + +#include "bolt/Core/BinaryBasicBlock.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator.h" + +namespace llvm { +namespace bolt { + +class BinaryFunction; + +/// The function layout represents the fragments we split a function into and +/// the order of basic blocks within each fragment. +/// +/// Interally, the function layout stores blocks across fragments contiguous. +/// This is neccessary to retain compatilibity with existing code and tests that +/// iterate over all blocks of the layout and depend on that order. When +/// writing new code, avoid iterating using FunctionLayout::blocks() by +/// iterating either over fragments or over BinaryFunction::begin()..end(). +class FunctionLayout { +public: + /// Named fragment when hot/cold splitting is used. + enum NamedFragment : unsigned { NF_HOT = 0, NF_COLD = 1 }; + +private: + using BasicBlockListType = SmallVector; + using block_iterator = BasicBlockListType::iterator; + using FragmentListType = SmallVector; + +public: + class FragmentIterator; + + /// A freestanding subset of contiguous blocks of a function. + class Fragment { + public: + using const_iterator = BasicBlockListType::const_iterator; + + private: + FragmentListType::const_iterator FragmentIt; + const_iterator Begin; + const_iterator End; + + Fragment(FragmentListType::const_iterator FragmentIt, const_iterator Begin, + const_iterator End) + : FragmentIt(FragmentIt), Begin(Begin), End(End) {} + + public: + unsigned size() const { return End - Begin; } + const_iterator begin() const { return Begin; } + const_iterator end() const { return End; } + BinaryBasicBlock *front() const { return *Begin; } + + friend class FunctionLayout; + friend class FragmentIterator; + }; + + class FragmentIterator + : public iterator_facade_base { + FragmentListType::const_iterator It; + BasicBlockListType::const_iterator BlockBegin; + + FragmentIterator(FragmentListType::const_iterator It, + BasicBlockListType::const_iterator BlockBegin) + : It(It), BlockBegin(BlockBegin) {} + + public: + bool operator==(const FragmentIterator &Other) const { + return It == Other.It; + } + + Fragment operator*() const { + return Fragment(It, BlockBegin + *It, BlockBegin + *std::next(It)); + } + + FragmentIterator &operator++() { + ++It; + return *this; + } + + FragmentIterator &operator--() { + --It; + return *this; + } + + friend class FunctionLayout; + }; + + using const_iterator = FragmentIterator; + using block_const_iterator = BasicBlockListType::const_iterator; + using block_const_reverse_iterator = + BasicBlockListType::const_reverse_iterator; + +private: + BasicBlockListType Blocks; + FragmentListType Fragments = {0}; + BasicBlockListType PreviousBlocks; + FragmentListType PreviousFragments; + +public: + /// Add an empty fragment. + Fragment addFragment(); + + /// Return the fragment identified by Number. + Fragment getFragment(unsigned Number) const; + + /// Find the fragment that contains BB. + Fragment findFragment(const BinaryBasicBlock *BB) const; + + /// Adds BB to the end of the last fragment. + void addBasicBlock(BinaryBasicBlock *BB); + + /// Insert range of basic blocks after InsertAfter. If InsertAfter is nullptr, + /// the blocks will be inserted at the start of the function. + void insertBasicBlocks(BinaryBasicBlock *InsertAfter, + ArrayRef NewBlocks); + + /// Erase all blocks from the layout that are in ToErase. + void eraseBasicBlocks(const DenseSet ToErase); + + /// Make sure fragments' and basic blocks' indices match the current layout. + void updateLayoutIndices() const; + + /// Replace the current layout with NewLayout. Uses the block's + /// self-identifying fragment number to assign blocks to infer function + /// fragments. + void update(const ArrayRef NewLayout); + + /// Return true if the layout has been changed by basic block reordering, + /// false otherwise. + bool hasLayoutChanged() const { return !PreviousBlocks.empty(); } + + /// Clear layout releasing memory. + void clear(); + + BinaryBasicBlock *getBlock(unsigned Index) const { return Blocks[Index]; } + + /// Returns the basic block after the given basic block in the layout or + /// nullptr the last basic block is given. + BinaryBasicBlock *getBasicBlockAfter(const BinaryBasicBlock *BB, + bool IgnoreSplits = true) const; + + /// Get the edit distance of the new layout with respect to the previous + /// layout after basic block reordering. + uint64_t getEditDistance() const; + + size_t size() const { return Fragments.size() - 1; } + bool empty() const { return Fragments.size() == 1; } + const_iterator begin() const { return {Fragments.begin(), Blocks.begin()}; } + const_iterator end() const { + return {std::prev(Fragments.end()), Blocks.begin()}; + } + Fragment front() const { return *begin(); } + Fragment back() const { return *std::prev(end()); } + Fragment operator[](unsigned Number) const { return getFragment(Number); } + + size_t block_size() const { return Blocks.size(); } + bool block_empty() const { return Blocks.empty(); } + BinaryBasicBlock *block_front() const { return Blocks.front(); } + BinaryBasicBlock *block_back() const { return Blocks.back(); } + block_const_iterator block_begin() const { return Blocks.begin(); } + block_const_iterator block_end() const { return Blocks.end(); } + iterator_range blocks() const { + return {block_begin(), block_end()}; + } + block_const_reverse_iterator block_rbegin() const { return Blocks.rbegin(); } + block_const_reverse_iterator block_rend() const { return Blocks.rend(); } + iterator_range rblocks() const { + return {block_rbegin(), block_rend()}; + } + +private: + block_const_iterator findBasicBlockPos(const BinaryBasicBlock *BB) const; + block_iterator findBasicBlockPos(const BinaryBasicBlock *BB); +}; + +} // namespace bolt +} // namespace llvm + +#endif diff --git a/bolt/lib/Core/BinaryEmitter.cpp b/bolt/lib/Core/BinaryEmitter.cpp --- a/bolt/lib/Core/BinaryEmitter.cpp +++ b/bolt/lib/Core/BinaryEmitter.cpp @@ -398,7 +398,7 @@ // Track the first emitted instruction with debug info. bool FirstInstr = true; - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (EmitColdPart != BB->isCold()) continue; diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -22,7 +22,6 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" -#include "llvm/ADT/edit_distance.h" #include "llvm/Demangle/Demangle.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAsmLayout.h" @@ -331,34 +330,34 @@ // Any unnecessary fallthrough jumps revealed after calling eraseInvalidBBs // will be cleaned up by fixBranches(). std::pair BinaryFunction::eraseInvalidBBs() { - BasicBlockOrderType NewLayout; + DenseSet InvalidBBs; unsigned Count = 0; uint64_t Bytes = 0; - for (BinaryBasicBlock *BB : layout()) { - if (BB->isValid()) { - NewLayout.push_back(BB); - } else { + for (BinaryBasicBlock *const BB : BasicBlocks) { + if (!BB->isValid()) { assert(!isEntryPoint(*BB) && "all entry blocks must be valid"); + InvalidBBs.insert(BB); ++Count; Bytes += BC.computeCodeSize(BB->begin(), BB->end()); } } - BasicBlocksLayout = std::move(NewLayout); + + Layout.eraseBasicBlocks(InvalidBBs); BasicBlockListType NewBasicBlocks; for (auto I = BasicBlocks.begin(), E = BasicBlocks.end(); I != E; ++I) { BinaryBasicBlock *BB = *I; - if (BB->isValid()) { - NewBasicBlocks.push_back(BB); - } else { + if (InvalidBBs.contains(BB)) { // Make sure the block is removed from the list of predecessors. BB->removeAllSuccessors(); DeletedBasicBlocks.push_back(BB); + } else { + NewBasicBlocks.push_back(BB); } } BasicBlocks = std::move(NewBasicBlocks); - assert(BasicBlocks.size() == BasicBlocksLayout.size()); + assert(BasicBlocks.size() == Layout.block_size()); // Update CFG state if needed if (Count > 0) @@ -458,10 +457,10 @@ } if (FrameInstructions.size()) OS << "\n CFI Instrs : " << FrameInstructions.size(); - if (BasicBlocksLayout.size()) { + if (!Layout.block_empty()) { OS << "\n BB Layout : "; ListSeparator LS; - for (BinaryBasicBlock *BB : BasicBlocksLayout) + for (const BinaryBasicBlock *BB : Layout.blocks()) OS << LS << BB->getName(); } if (ImageAddress) @@ -471,7 +470,7 @@ OS << "\n Profile Acc : " << format("%.1f%%", ProfileMatchRatio * 100.0f); } - if (opts::PrintDynoStats && !BasicBlocksLayout.empty()) { + if (opts::PrintDynoStats && !getLayout().block_empty()) { OS << '\n'; DynoStats dynoStats = getDynoStats(*this); OS << dynoStats; @@ -503,105 +502,108 @@ } } - for (uint32_t I = 0, E = BasicBlocksLayout.size(); I != E; ++I) { - BinaryBasicBlock *BB = BasicBlocksLayout[I]; - if (I != 0 && BB->isCold() != BasicBlocksLayout[I - 1]->isCold()) - OS << "------- HOT-COLD SPLIT POINT -------\n\n"; - - OS << BB->getName() << " (" << BB->size() - << " instructions, align : " << BB->getAlignment() << ")\n"; - - if (isEntryPoint(*BB)) { - if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB)) - OS << " Secondary Entry Point: " << EntrySymbol->getName() << '\n'; - else - OS << " Entry Point\n"; - } - - if (BB->isLandingPad()) - OS << " Landing Pad\n"; - - uint64_t BBExecCount = BB->getExecutionCount(); - if (hasValidProfile()) { - OS << " Exec Count : "; - if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) - OS << BBExecCount << '\n'; - else - OS << "\n"; - } - if (BB->getCFIState() >= 0) - OS << " CFI State : " << BB->getCFIState() << '\n'; - if (opts::EnableBAT) { - OS << " Input offset: " << Twine::utohexstr(BB->getInputOffset()) - << "\n"; - } - if (!BB->pred_empty()) { - OS << " Predecessors: "; - ListSeparator LS; - for (BinaryBasicBlock *Pred : BB->predecessors()) - OS << LS << Pred->getName(); - OS << '\n'; - } - if (!BB->throw_empty()) { - OS << " Throwers: "; - ListSeparator LS; - for (BinaryBasicBlock *Throw : BB->throwers()) - OS << LS << Throw->getName(); - OS << '\n'; - } - - Offset = alignTo(Offset, BB->getAlignment()); + StringRef SplitPointMsg = ""; + for (const FunctionLayout::Fragment F : Layout) { + OS << SplitPointMsg; + SplitPointMsg = "------- HOT-COLD SPLIT POINT -------\n\n"; + for (const BinaryBasicBlock *BB : F) { + OS << BB->getName() << " (" << BB->size() + << " instructions, align : " << BB->getAlignment() << ")\n"; + + if (isEntryPoint(*BB)) { + if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB)) + OS << " Secondary Entry Point: " << EntrySymbol->getName() << '\n'; + else + OS << " Entry Point\n"; + } - // Note: offsets are imprecise since this is happening prior to relaxation. - Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this); + if (BB->isLandingPad()) + OS << " Landing Pad\n"; - if (!BB->succ_empty()) { - OS << " Successors: "; - // For more than 2 successors, sort them based on frequency. - std::vector Indices(BB->succ_size()); - std::iota(Indices.begin(), Indices.end(), 0); - if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) { - llvm::stable_sort(Indices, [&](const uint64_t A, const uint64_t B) { - return BB->BranchInfo[B] < BB->BranchInfo[A]; - }); + uint64_t BBExecCount = BB->getExecutionCount(); + if (hasValidProfile()) { + OS << " Exec Count : "; + if (BB->getExecutionCount() != BinaryBasicBlock::COUNT_NO_PROFILE) + OS << BBExecCount << '\n'; + else + OS << "\n"; + } + if (BB->getCFIState() >= 0) + OS << " CFI State : " << BB->getCFIState() << '\n'; + if (opts::EnableBAT) { + OS << " Input offset: " << Twine::utohexstr(BB->getInputOffset()) + << "\n"; } - ListSeparator LS; - for (unsigned I = 0; I < Indices.size(); ++I) { - BinaryBasicBlock *Succ = BB->Successors[Indices[I]]; - BinaryBasicBlock::BinaryBranchInfo &BI = BB->BranchInfo[Indices[I]]; - OS << LS << Succ->getName(); - if (ExecutionCount != COUNT_NO_PROFILE && - BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { - OS << " (mispreds: " << BI.MispredictedCount - << ", count: " << BI.Count << ")"; - } else if (ExecutionCount != COUNT_NO_PROFILE && - BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) { - OS << " (inferred count: " << BI.Count << ")"; + if (!BB->pred_empty()) { + OS << " Predecessors: "; + ListSeparator LS; + for (BinaryBasicBlock *Pred : BB->predecessors()) + OS << LS << Pred->getName(); + OS << '\n'; + } + if (!BB->throw_empty()) { + OS << " Throwers: "; + ListSeparator LS; + for (BinaryBasicBlock *Throw : BB->throwers()) + OS << LS << Throw->getName(); + OS << '\n'; + } + + Offset = alignTo(Offset, BB->getAlignment()); + + // Note: offsets are imprecise since this is happening prior to + // relaxation. + Offset = BC.printInstructions(OS, BB->begin(), BB->end(), Offset, this); + + if (!BB->succ_empty()) { + OS << " Successors: "; + // For more than 2 successors, sort them based on frequency. + std::vector Indices(BB->succ_size()); + std::iota(Indices.begin(), Indices.end(), 0); + if (BB->succ_size() > 2 && BB->getKnownExecutionCount()) { + llvm::stable_sort(Indices, [&](const uint64_t A, const uint64_t B) { + return BB->BranchInfo[B] < BB->BranchInfo[A]; + }); } + ListSeparator LS; + for (unsigned I = 0; I < Indices.size(); ++I) { + BinaryBasicBlock *Succ = BB->Successors[Indices[I]]; + const BinaryBasicBlock::BinaryBranchInfo &BI = + BB->BranchInfo[Indices[I]]; + OS << LS << Succ->getName(); + if (ExecutionCount != COUNT_NO_PROFILE && + BI.MispredictedCount != BinaryBasicBlock::COUNT_INFERRED) { + OS << " (mispreds: " << BI.MispredictedCount + << ", count: " << BI.Count << ")"; + } else if (ExecutionCount != COUNT_NO_PROFILE && + BI.Count != BinaryBasicBlock::COUNT_NO_PROFILE) { + OS << " (inferred count: " << BI.Count << ")"; + } + } + OS << '\n'; } - OS << '\n'; - } - if (!BB->lp_empty()) { - OS << " Landing Pads: "; - ListSeparator LS; - for (BinaryBasicBlock *LP : BB->landing_pads()) { - OS << LS << LP->getName(); - if (ExecutionCount != COUNT_NO_PROFILE) { - OS << " (count: " << LP->getExecutionCount() << ")"; + if (!BB->lp_empty()) { + OS << " Landing Pads: "; + ListSeparator LS; + for (BinaryBasicBlock *LP : BB->landing_pads()) { + OS << LS << LP->getName(); + if (ExecutionCount != COUNT_NO_PROFILE) { + OS << " (count: " << LP->getExecutionCount() << ")"; + } } + OS << '\n'; } - OS << '\n'; - } - // In CFG_Finalized state we can miscalculate CFI state at exit. - if (CurrentState == State::CFG) { - const int32_t CFIStateAtExit = BB->getCFIStateAtExit(); - if (CFIStateAtExit >= 0) - OS << " CFI State: " << CFIStateAtExit << '\n'; - } + // In CFG_Finalized state we can miscalculate CFI state at exit. + if (CurrentState == State::CFG) { + const int32_t CFIStateAtExit = BB->getCFIStateAtExit(); + if (CFIStateAtExit >= 0) + OS << " CFI State: " << CFIStateAtExit << '\n'; + } - OS << '\n'; + OS << '\n'; + } } // Dump new exception ranges for the function. @@ -2107,14 +2109,14 @@ // Set the basic block layout to the original order and set end offsets. PrevBB = nullptr; for (BinaryBasicBlock *BB : BasicBlocks) { - BasicBlocksLayout.emplace_back(BB); + Layout.addBasicBlock(BB); if (PrevBB) PrevBB->setEndOffset(BB->getOffset()); PrevBB = BB; } PrevBB->setEndOffset(getSize()); - updateLayoutIndices(); + Layout.updateLayoutIndices(); normalizeCFIState(); @@ -2744,7 +2746,7 @@ // equivalent unwindCFIState sequence required at that point to achieve the // same effect of the restore. All remember state are then just ignored. std::stack Stack; - for (BinaryBasicBlock *CurBB : BasicBlocksLayout) { + for (BinaryBasicBlock *CurBB : Layout.blocks()) { for (auto II = CurBB->begin(); II != CurBB->end(); ++II) { if (const MCCFIInstruction *CFI = getCFIFor(*II)) { if (CFI->getOperation() == MCCFIInstruction::OpRememberState) { @@ -2769,39 +2771,36 @@ LLVM_DEBUG(dbgs() << "This is the list of CFI states for each BB of " << *this << ": "); - int32_t State = 0; - bool SeenCold = false; const char *Sep = ""; (void)Sep; - for (BinaryBasicBlock *BB : BasicBlocksLayout) { - const int32_t CFIStateAtExit = BB->getCFIStateAtExit(); - - // Hot-cold border: check if this is the first BB to be allocated in a cold - // region (with a different FDE). If yes, we need to reset the CFI state. - if (!SeenCold && BB->isCold()) { - State = 0; - SeenCold = true; - } + for (const FunctionLayout::Fragment F : Layout) { + // Hot-cold border: at start of each region (with a different FDE) we need + // to reset the CFI state. + int32_t State = 0; - // We need to recover the correct state if it doesn't match expected - // state at BB entry point. - if (BB->getCFIState() < State) { - // In this case, State is currently higher than what this BB expect it - // to be. To solve this, we need to insert CFI instructions to undo - // the effect of all CFI from BB's state to current State. - auto InsertIt = BB->begin(); - unwindCFIState(State, BB->getCFIState(), BB, InsertIt); - } else if (BB->getCFIState() > State) { - // If BB's CFI state is greater than State, it means we are behind in the - // state. Just emit all instructions to reach this state at the - // beginning of this BB. If this sequence of instructions involve - // remember state or restore state, bail out. - if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin())) - return false; - } + for (BinaryBasicBlock *BB : F) { + const int32_t CFIStateAtExit = BB->getCFIStateAtExit(); - State = CFIStateAtExit; - LLVM_DEBUG(dbgs() << Sep << State; Sep = ", "); + // We need to recover the correct state if it doesn't match expected + // state at BB entry point. + if (BB->getCFIState() < State) { + // In this case, State is currently higher than what this BB expect it + // to be. To solve this, we need to insert CFI instructions to undo + // the effect of all CFI from BB's state to current State. + auto InsertIt = BB->begin(); + unwindCFIState(State, BB->getCFIState(), BB, InsertIt); + } else if (BB->getCFIState() > State) { + // If BB's CFI state is greater than State, it means we are behind in + // the state. Just emit all instructions to reach this state at the + // beginning of this BB. If this sequence of instructions involve + // remember state or restore state, bail out. + if (!replayCFIInstrs(State, BB->getCFIState(), BB, BB->begin())) + return false; + } + + State = CFIStateAtExit; + LLVM_DEBUG(dbgs() << Sep << State; Sep = ", "); + } } LLVM_DEBUG(dbgs() << "\n"); @@ -2831,13 +2830,6 @@ return Count; } -bool BinaryFunction::hasLayoutChanged() const { return ModifiedLayout; } - -uint64_t BinaryFunction::getEditDistance() const { - return ComputeEditDistance(BasicBlocksPreviousLayout, - BasicBlocksLayout); -} - void BinaryFunction::clearDisasmState() { clearList(Instructions); clearList(IgnoredBranches); @@ -2894,8 +2886,7 @@ delete BB; clearList(DeletedBasicBlocks); - clearList(BasicBlocksLayout); - clearList(BasicBlocksPreviousLayout); + Layout.clear(); } CurrentState = State::Empty; @@ -2908,7 +2899,7 @@ void BinaryFunction::duplicateConstantIslands() { assert(Islands && "function expected to have constant islands"); - for (BinaryBasicBlock *BB : layout()) { + for (BinaryBasicBlock *BB : getLayout().blocks()) { if (!BB->isCold()) continue; @@ -3002,8 +2993,8 @@ << "node [fontname=courier, shape=box, style=filled, colorscheme=brbg9]\n"; uint64_t Offset = Address; for (BinaryBasicBlock *BB : BasicBlocks) { - auto LayoutPos = llvm::find(BasicBlocksLayout, BB); - unsigned Layout = LayoutPos - BasicBlocksLayout.begin(); + auto LayoutPos = find(Layout.blocks(), BB); + unsigned LayoutIndex = LayoutPos - Layout.block_begin(); const char *ColdStr = BB->isCold() ? " (cold)" : ""; std::vector Attrs; // Bold box for entry points @@ -3027,7 +3018,7 @@ OS << format("\"%s\" [label=\"%s%s\\n(C:%lu,O:%lu,I:%u,L:%u,CFI:%u)\\n", BB->getName().data(), BB->getName().data(), ColdStr, BB->getKnownExecutionCount(), BB->getOffset(), getIndex(BB), - Layout, BB->getCFIState()); + LayoutIndex, BB->getCFIState()); if (opts::DotToolTipCode) { std::string Str; @@ -3213,8 +3204,7 @@ auto &MIB = BC.MIB; MCContext *Ctx = BC.Ctx.get(); - for (unsigned I = 0, E = BasicBlocksLayout.size(); I != E; ++I) { - BinaryBasicBlock *BB = BasicBlocksLayout[I]; + for (BinaryBasicBlock *BB : BasicBlocks) { const MCSymbol *TBB = nullptr; const MCSymbol *FBB = nullptr; MCInst *CondBranch = nullptr; @@ -3227,9 +3217,7 @@ BB->eraseInstruction(BB->findInstruction(UncondBranch)); // Basic block that follows the current one in the final layout. - const BinaryBasicBlock *NextBB = nullptr; - if (I + 1 != E && BB->isCold() == BasicBlocksLayout[I + 1]->isCold()) - NextBB = BasicBlocksLayout[I + 1]; + const BinaryBasicBlock *NextBB = Layout.getBasicBlockAfter(BB, false); if (BB->succ_size() == 1) { // __builtin_unreachable() could create a conditional branch that @@ -3560,7 +3548,11 @@ assert(hasCFG() && "function is expected to have CFG"); - const BasicBlockOrderType &Order = UseDFS ? dfs() : BasicBlocksLayout; + BasicBlockListType Order; + if (UseDFS) + Order = dfs(); + else + copy(Layout.blocks(), std::back_inserter(Order)); // The hash is computed by creating a string of all instruction opcodes and // possibly their operands and then hashing that string with std::hash. @@ -3671,23 +3663,22 @@ void BinaryFunction::updateLayout(BinaryBasicBlock *Start, const unsigned NumNewBlocks) { - // If start not provided insert new blocks at the beginning + BasicBlockListType::iterator Begin; + BasicBlockListType::iterator End; + + // If start not provided copy new blocks from the beginning of BasicBlocks if (!Start) { - BasicBlocksLayout.insert(layout_begin(), BasicBlocks.begin(), - BasicBlocks.begin() + NumNewBlocks); - updateLayoutIndices(); - return; + Begin = BasicBlocks.begin(); + End = BasicBlocks.begin() + NumNewBlocks; + } else { + unsigned StartIndex = getIndex(Start); + Begin = std::next(BasicBlocks.begin(), StartIndex + 1); + End = std::next(BasicBlocks.begin(), StartIndex + NumNewBlocks + 1); } // Insert new blocks in the layout immediately after Start. - auto Pos = llvm::find(layout(), Start); - assert(Pos != layout_end()); - BasicBlockListType::iterator Begin = - std::next(BasicBlocks.begin(), getIndex(Start) + 1); - BasicBlockListType::iterator End = - std::next(BasicBlocks.begin(), getIndex(Start) + NumNewBlocks + 1); - BasicBlocksLayout.insert(Pos + 1, Begin, End); - updateLayoutIndices(); + Layout.insertBasicBlocks(Start, {Begin, End}); + Layout.updateLayoutIndices(); } bool BinaryFunction::checkForAmbiguousJumpTables() { @@ -4087,12 +4078,11 @@ return; // AArch64 may have functions that only contains a constant island (no code). - if (layout_begin() == layout_end()) + if (getLayout().block_empty()) return; BinaryBasicBlock *PrevBB = nullptr; - for (auto BBI = layout_begin(), BBE = layout_end(); BBI != BBE; ++BBI) { - BinaryBasicBlock *BB = *BBI; + for (BinaryBasicBlock *BB : this->Layout.blocks()) { assert(BB->getLabel()->isDefined() && "symbol should be defined"); const uint64_t BBBaseAddress = BB->isCold() ? ColdBaseAddress : BaseAddress; if (!BC.HasRelocations) { diff --git a/bolt/lib/Core/CMakeLists.txt b/bolt/lib/Core/CMakeLists.txt --- a/bolt/lib/Core/CMakeLists.txt +++ b/bolt/lib/Core/CMakeLists.txt @@ -18,6 +18,7 @@ DebugData.cpp DynoStats.cpp Exceptions.cpp + FunctionLayout.cpp JumpTable.cpp MCPlusBuilder.cpp ParallelUtilities.cpp diff --git a/bolt/lib/Core/DynoStats.cpp b/bolt/lib/Core/DynoStats.cpp --- a/bolt/lib/Core/DynoStats.cpp +++ b/bolt/lib/Core/DynoStats.cpp @@ -169,9 +169,9 @@ // Update enumeration of basic blocks for correct detection of branch' // direction. - BF.updateLayoutIndices(); + BF.getLayout().updateLayoutIndices(); - for (BinaryBasicBlock *const &BB : BF.layout()) { + for (BinaryBasicBlock *const BB : BF.getLayout().blocks()) { // The basic block execution count equals to the sum of incoming branch // frequencies. This may deviate from the sum of outgoing branches of the // basic block especially since the block may contain a function that diff --git a/bolt/lib/Core/Exceptions.cpp b/bolt/lib/Core/Exceptions.cpp --- a/bolt/lib/Core/Exceptions.cpp +++ b/bolt/lib/Core/Exceptions.cpp @@ -357,7 +357,7 @@ // Sites to update - either regular or cold. CallSitesType *Sites = &CallSites; - for (BinaryBasicBlock *&BB : BasicBlocksLayout) { + for (BinaryBasicBlock *BB : getLayout().blocks()) { if (BB->isCold() && !SeenCold) { SeenCold = true; diff --git a/bolt/lib/Core/FunctionLayout.cpp b/bolt/lib/Core/FunctionLayout.cpp new file mode 100644 --- /dev/null +++ b/bolt/lib/Core/FunctionLayout.cpp @@ -0,0 +1,151 @@ +#include "bolt/Core/FunctionLayout.h" +#include "bolt/Core/BinaryFunction.h" +#include "llvm/ADT/edit_distance.h" +#include +#include + +using namespace llvm; +using namespace bolt; + +FunctionLayout::Fragment FunctionLayout::addFragment() { + Fragments.emplace_back(Blocks.size()); + return back(); +} + +FunctionLayout::Fragment FunctionLayout::getFragment(unsigned Number) const { + return Fragment(Fragments.begin() + Number, block_begin() + Fragments[Number], + block_begin() + Fragments[Number + 1]); +} + +FunctionLayout::Fragment +FunctionLayout::findFragment(const BinaryBasicBlock *BB) const { + return getFragment(BB->getFragmentNum()); +} + +void FunctionLayout::addBasicBlock(BinaryBasicBlock *BB) { + if (empty()) + addFragment(); + + BB->setLayoutIndex(Blocks.size()); + Blocks.emplace_back(BB); + ++Fragments.back(); + + assert(Fragments.back() == Blocks.size()); +} + +void FunctionLayout::insertBasicBlocks(BinaryBasicBlock *InsertAfter, + ArrayRef NewBlocks) { + if (empty()) + addFragment(); + + const block_iterator InsertBeforePos = + InsertAfter ? std::next(findBasicBlockPos(InsertAfter)) : Blocks.begin(); + Blocks.insert(InsertBeforePos, NewBlocks.begin(), NewBlocks.end()); + + unsigned FragmentUpdateStart = + InsertAfter ? InsertAfter->getFragmentNum() + 1 : 1; + std::for_each( + Fragments.begin() + FragmentUpdateStart, Fragments.end(), + [&](unsigned &FragmentOffset) { FragmentOffset += NewBlocks.size(); }); +} + +void FunctionLayout::eraseBasicBlocks( + const DenseSet ToErase) { + auto IsErased = [&](const BinaryBasicBlock *const BB) { + return ToErase.contains(BB); + }; + FragmentListType NewFragments; + NewFragments.emplace_back(0); + for (const Fragment F : *this) { + unsigned ErasedBlocks = count_if(F, IsErased); + unsigned NewFragment = NewFragments.back() + F.size() - ErasedBlocks; + NewFragments.emplace_back(NewFragment); + } + Blocks.erase(std::remove_if(Blocks.begin(), Blocks.end(), IsErased), + Blocks.end()); + Fragments = std::move(NewFragments); +} + +void FunctionLayout::updateLayoutIndices() const { + unsigned BlockIndex = 0; + for (const Fragment F : *this) { + for (BinaryBasicBlock *const BB : F) + BB->setLayoutIndex(BlockIndex++); + } +} + +void FunctionLayout::update(const ArrayRef NewLayout) { + PreviousBlocks = std::move(Blocks); + PreviousFragments = std::move(Fragments); + + Blocks.clear(); + Fragments = {0}; + + if (NewLayout.empty()) + return; + + copy(NewLayout, std::back_inserter(Blocks)); + + // Generate fragments + for (const auto &BB : enumerate(Blocks)) { + unsigned FragmentNum = BB.value()->getFragmentNum(); + + assert(FragmentNum + 1 >= size() && + "Blocks must be arranged such that fragments are monotonous " + "increasing."); + + // Add empty fragments if necessary + for (unsigned I = size(); I <= FragmentNum; ++I) { + addFragment(); + Fragments[I] = BB.index(); + } + + // Set the next fragment to point one past the current BB + Fragments[FragmentNum + 1] = BB.index() + 1; + } + + if (PreviousBlocks == Blocks && PreviousFragments == Fragments) { + // If new layout is the same as previous layout, clear previous layout so + // hasLayoutChanged() returns false. + PreviousBlocks = {}; + PreviousFragments = {}; + } +} + +void FunctionLayout::clear() { + Blocks = {}; + Fragments = {0}; + PreviousBlocks = {}; + PreviousFragments = {0}; +} + +BinaryBasicBlock *FunctionLayout::getBasicBlockAfter(const BinaryBasicBlock *BB, + bool IgnoreSplits) const { + const block_const_iterator BBPos = find(Blocks, BB); + if (BBPos == Blocks.end()) + return nullptr; + + const block_const_iterator BlockAfter = std::next(BBPos); + if (BlockAfter == Blocks.end()) + return nullptr; + + if (!IgnoreSplits) + if (BlockAfter == getFragment(BB->getFragmentNum()).end()) + return nullptr; + + return *BlockAfter; +} + +uint64_t FunctionLayout::getEditDistance() const { + return ComputeEditDistance(PreviousBlocks, Blocks); +} + +FunctionLayout::block_const_iterator +FunctionLayout::findBasicBlockPos(const BinaryBasicBlock *BB) const { + return find(Blocks, BB); +} + +FunctionLayout::block_iterator +FunctionLayout::findBasicBlockPos(const BinaryBasicBlock *BB) { + return find(Blocks, BB); +} diff --git a/bolt/lib/Passes/Aligner.cpp b/bolt/lib/Passes/Aligner.cpp --- a/bolt/lib/Passes/Aligner.cpp +++ b/bolt/lib/Passes/Aligner.cpp @@ -113,7 +113,7 @@ const uint64_t FuncCount = std::max(1, Function.getKnownExecutionCount()); BinaryBasicBlock *PrevBB = nullptr; - for (BinaryBasicBlock *BB : Function.layout()) { + for (BinaryBasicBlock *BB : Function.getLayout().blocks()) { uint64_t Count = BB->getKnownExecutionCount(); if (Count <= FuncCount * opts::AlignBlocksThreshold / 100) { diff --git a/bolt/lib/Passes/AsmDump.cpp b/bolt/lib/Passes/AsmDump.cpp --- a/bolt/lib/Passes/AsmDump.cpp +++ b/bolt/lib/Passes/AsmDump.cpp @@ -196,7 +196,7 @@ std::unordered_set CallReferences; MAP->OutStreamer->emitCFIStartProc(/*IsSimple=*/false); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { OS << BB->getName() << ": \n"; const std::string BranchLabel = Twine(BB->getName(), "_br").str(); diff --git a/bolt/lib/Passes/BinaryFunctionCallGraph.cpp b/bolt/lib/Passes/BinaryFunctionCallGraph.cpp --- a/bolt/lib/Passes/BinaryFunctionCallGraph.cpp +++ b/bolt/lib/Passes/BinaryFunctionCallGraph.cpp @@ -215,7 +215,7 @@ ++NotProcessed; } } else { - for (BinaryBasicBlock *BB : Function->layout()) { + for (BinaryBasicBlock *BB : Function->getLayout().blocks()) { // Don't count calls from cold blocks unless requested. if (BB->isCold() && !IncludeColdCalls) continue; diff --git a/bolt/lib/Passes/BinaryPasses.cpp b/bolt/lib/Passes/BinaryPasses.cpp --- a/bolt/lib/Passes/BinaryPasses.cpp +++ b/bolt/lib/Passes/BinaryPasses.cpp @@ -334,7 +334,7 @@ } void EliminateUnreachableBlocks::runOnFunction(BinaryFunction &Function) { - if (Function.layout_size() > 0) { + if (!Function.getLayout().block_empty()) { unsigned Count; uint64_t Bytes; Function.markUnreachableBlocks(); @@ -392,7 +392,7 @@ ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { modifyFunctionLayout(BF, opts::ReorderBlocks, opts::MinBranchClusters); - if (BF.hasLayoutChanged()) + if (BF.getLayout().hasLayoutChanged()) ++ModifiedFuncCount; }; @@ -439,7 +439,7 @@ OS << " There are " << Function.getInstructionCount() << " number of instructions in this function.\n"; OS << " The edit distance for this function is: " - << Function.getEditDistance() << "\n\n"; + << Function.getLayout().getEditDistance() << "\n\n"; } } } @@ -500,7 +500,7 @@ Algo->reorderBasicBlocks(BF, NewLayout); - BF.updateBasicBlockLayout(NewLayout); + BF.getLayout().update(NewLayout); } void FixupBranches::runOnFunctions(BinaryContext &BC) { @@ -581,7 +581,7 @@ // Have we crossed hot/cold border for split functions? bool SeenCold = false; - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->isCold() && !SeenCold) { SeenCold = true; CurrentGnuArgsSize = 0; @@ -677,7 +677,7 @@ MIB->getTargetSymbol(*UncondBranch) == BB.getLabel()) { MIB->replaceBranchTarget(*UncondBranch, Succ->getLabel(), Ctx); } else if (!UncondBranch) { - assert(Function.getBasicBlockAfter(Pred, false) != Succ && + assert(Function.getLayout().getBasicBlockAfter(Pred, false) != Succ && "Don't add an explicit jump to a fallthrough block."); Pred->addBranchInstruction(Succ); } @@ -781,7 +781,7 @@ uint64_t SimplifyConditionalTailCalls::fixTailCalls(BinaryFunction &BF) { // Need updated indices to correctly detect branch' direction. - BF.updateLayoutIndices(); + BF.getLayout().updateLayoutIndices(); BF.markUnreachableBlocks(); MCPlusBuilder *MIB = BF.getBinaryContext().MIB.get(); @@ -798,7 +798,7 @@ return (BB->pred_size() != 0 || BB->isLandingPad() || BB->isEntryPoint()); }; - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { // Locate BB with a single direct tail-call instruction. if (BB->getNumNonPseudos() != 1) continue; @@ -915,9 +915,10 @@ // Find the next valid block. Invalid blocks will be deleted // so they shouldn't be considered fallthrough targets. - const BinaryBasicBlock *NextBlock = BF.getBasicBlockAfter(PredBB, false); + const BinaryBasicBlock *NextBlock = + BF.getLayout().getBasicBlockAfter(PredBB, false); while (NextBlock && !isValid(NextBlock)) - NextBlock = BF.getBasicBlockAfter(NextBlock, false); + NextBlock = BF.getLayout().getBasicBlockAfter(NextBlock, false); // Get the unconditional successor to this block. const BinaryBasicBlock *PredSucc = PredBB->getSuccessor(); @@ -1106,7 +1107,7 @@ uint64_t NumLocalLoadsFound = 0; uint64_t NumDynamicLocalLoadsFound = 0; - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { for (MCInst &Inst : *BB) { unsigned Opcode = Inst.getOpcode(); const MCInstrDesc &Desc = BC.MII->get(Opcode); @@ -1548,7 +1549,7 @@ const uint64_t HotThreshold = std::max(BF.getKnownExecutionCount(), 1); bool HotSeen = false; - for (const BinaryBasicBlock *BB : BF.rlayout()) { + for (const BinaryBasicBlock *BB : BF.getLayout().rblocks()) { if (!HotSeen && BB->getKnownExecutionCount() > HotThreshold) { HotSeen = true; continue; diff --git a/bolt/lib/Passes/CacheMetrics.cpp b/bolt/lib/Passes/CacheMetrics.cpp --- a/bolt/lib/Passes/CacheMetrics.cpp +++ b/bolt/lib/Passes/CacheMetrics.cpp @@ -117,7 +117,7 @@ for (BinaryFunction *SrcFunction : BinaryFunctions) { const BinaryContext &BC = SrcFunction->getBinaryContext(); - for (BinaryBasicBlock *BB : SrcFunction->layout()) { + for (BinaryBasicBlock *BB : SrcFunction->getLayout().blocks()) { // Find call instructions and extract target symbols from each one for (MCInst &Inst : *BB) { if (!BC.MIB->isCall(Inst)) @@ -132,7 +132,7 @@ const BinaryFunction *DstFunction = BC.getFunctionForSymbol(DstSym); // Ignore recursive calls - if (DstFunction == nullptr || DstFunction->layout_empty() || + if (DstFunction == nullptr || DstFunction->getLayout().block_empty() || DstFunction == SrcFunction) continue; @@ -180,9 +180,9 @@ // Compute 'hotness' of the pages std::unordered_map PageSamples; for (BinaryFunction *BF : BinaryFunctions) { - if (BF->layout_empty()) + if (BF->getLayout().block_empty()) continue; - double Page = BBAddr.at(BF->layout_front()) / PageSize; + double Page = BBAddr.at(BF->getLayout().block_front()) / PageSize; PageSamples[Page] += FunctionSamples.at(BF); } @@ -190,17 +190,18 @@ double Misses = 0; for (BinaryFunction *BF : BinaryFunctions) { // Skip the function if it has no samples - if (BF->layout_empty() || FunctionSamples.at(BF) == 0.0) + if (BF->getLayout().block_empty() || FunctionSamples.at(BF) == 0.0) continue; double Samples = FunctionSamples.at(BF); - double Page = BBAddr.at(BF->layout_front()) / PageSize; + double Page = BBAddr.at(BF->getLayout().block_front()) / PageSize; // The probability that the page is not present in the cache double MissProb = pow(1.0 - PageSamples[Page] / TotalSamples, CacheEntries); // Processing all callers of the function for (std::pair Pair : Calls[BF]) { BinaryFunction *SrcFunction = Pair.first; - double SrcPage = BBAddr.at(SrcFunction->layout_front()) / PageSize; + double SrcPage = + BBAddr.at(SrcFunction->getLayout().block_front()) / PageSize; // Is this a 'long' or a 'short' call? if (Page != SrcPage) { // This is a miss diff --git a/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp b/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp --- a/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp +++ b/bolt/lib/Passes/ExtTSPReorderAlgorithm.cpp @@ -467,9 +467,9 @@ Emitter = BF.getBinaryContext().createIndependentMCCodeEmitter(); // Initialize CFG nodes - AllBlocks.reserve(BF.layout_size()); + AllBlocks.reserve(BF.getLayout().block_size()); size_t LayoutIndex = 0; - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { BB->setLayoutIndex(LayoutIndex++); uint64_t Size = std::max(BB->estimateSize(Emitter.MCE.get()), 1); @@ -508,8 +508,8 @@ } // Initialize chains - AllChains.reserve(BF.layout_size()); - HotChains.reserve(BF.layout_size()); + AllChains.reserve(BF.getLayout().block_size()); + HotChains.reserve(BF.getLayout().block_size()); for (Block &Block : AllBlocks) { AllChains.emplace_back(Block.Index, &Block); Block.CurChain = &AllChains.back(); @@ -646,7 +646,7 @@ /// consideration. This allows to maintain the original block order in the /// absense of profile data void mergeColdChains() { - for (BinaryBasicBlock *SrcBB : BF.layout()) { + for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) { // Iterating in reverse order to make sure original fallthrough jumps are // merged first; this might be beneficial for code size. for (auto Itr = SrcBB->succ_rbegin(); Itr != SrcBB->succ_rend(); ++Itr) { @@ -841,7 +841,7 @@ }); // Collect the basic blocks in the order specified by their chains - Order.reserve(BF.layout_size()); + Order.reserve(BF.getLayout().block_size()); for (Chain *Chain : SortedChains) for (Block *Block : Chain->blocks()) Order.push_back(Block->BB); @@ -866,12 +866,12 @@ void ExtTSPReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF, BasicBlockOrder &Order) const { - if (BF.layout_empty()) + if (BF.getLayout().block_empty()) return; // Do not change layout of functions w/o profile information - if (!BF.hasValidProfile() || BF.layout_size() <= 2) { - for (BinaryBasicBlock *BB : BF.layout()) + if (!BF.hasValidProfile() || BF.getLayout().block_size() <= 2) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) Order.push_back(BB); return; } @@ -881,7 +881,8 @@ // Verify correctness assert(Order[0]->isEntryPoint() && "Original entry point is not preserved"); - assert(Order.size() == BF.layout_size() && "Wrong size of reordered layout"); + assert(Order.size() == BF.getLayout().block_size() && + "Wrong size of reordered layout"); } } // namespace bolt diff --git a/bolt/lib/Passes/FrameAnalysis.cpp b/bolt/lib/Passes/FrameAnalysis.cpp --- a/bolt/lib/Passes/FrameAnalysis.cpp +++ b/bolt/lib/Passes/FrameAnalysis.cpp @@ -413,7 +413,7 @@ bool NoInfo = false; FrameAccessAnalysis FAA(BF, getSPT(BF)); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { FAA.enterNewBB(); for (MCInst &Inst : *BB) { @@ -474,7 +474,7 @@ LLVM_DEBUG(dbgs() << "Restoring frame indices for \"" << BF.getPrintName() << "\"\n"); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { LLVM_DEBUG(dbgs() << "\tNow at BB " << BB->getName() << "\n"); FAA.enterNewBB(); diff --git a/bolt/lib/Passes/IdenticalCodeFolding.cpp b/bolt/lib/Passes/IdenticalCodeFolding.cpp --- a/bolt/lib/Passes/IdenticalCodeFolding.cpp +++ b/bolt/lib/Passes/IdenticalCodeFolding.cpp @@ -155,7 +155,7 @@ // instruction sequences and the same index in their corresponding // functions. The latter is important for CFG equality. - if (A.layout_size() != B.layout_size()) + if (A.getLayout().block_size() != B.getLayout().block_size()) return false; // Comparing multi-entry functions could be non-trivial. @@ -163,10 +163,16 @@ return false; // Process both functions in either DFS or existing order. - const BinaryFunction::BasicBlockOrderType &OrderA = - opts::UseDFS ? A.dfs() : A.getLayout(); - const BinaryFunction::BasicBlockOrderType &OrderB = - opts::UseDFS ? B.dfs() : B.getLayout(); + const BinaryFunction::BasicBlockOrderType OrderA = + opts::UseDFS + ? A.dfs() + : BinaryFunction::BasicBlockOrderType(A.getLayout().block_begin(), + A.getLayout().block_end()); + const BinaryFunction::BasicBlockOrderType OrderB = + opts::UseDFS + ? B.dfs() + : BinaryFunction::BasicBlockOrderType(B.getLayout().block_begin(), + B.getLayout().block_end()); const BinaryContext &BC = A.getBinaryContext(); @@ -415,7 +421,7 @@ "ICF breakdown", opts::TimeICF); ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { // Make sure indices are in-order. - BF.updateLayoutIndices(); + BF.getLayout().updateLayoutIndices(); // Pre-compute hash before pushing into hashtable. // Hash instruction operands to minimize hash collisions. diff --git a/bolt/lib/Passes/IndirectCallPromotion.cpp b/bolt/lib/Passes/IndirectCallPromotion.cpp --- a/bolt/lib/Passes/IndirectCallPromotion.cpp +++ b/bolt/lib/Passes/IndirectCallPromotion.cpp @@ -1162,7 +1162,7 @@ !Function.hasProfile()) continue; - const bool HasLayout = !Function.layout_empty(); + const bool HasLayout = !Function.getLayout().block_empty(); for (BinaryBasicBlock &BB : Function) { if (HasLayout && Function.isSplit() && BB.isCold()) @@ -1222,7 +1222,7 @@ if (!Function.isSimple() || Function.isIgnored() || !Function.hasProfile()) continue; - const bool HasLayout = !Function.layout_empty(); + const bool HasLayout = !Function.getLayout().block_empty(); // Total number of indirect calls issued from the current Function. // (a fraction of TotalIndirectCalls) diff --git a/bolt/lib/Passes/Inliner.cpp b/bolt/lib/Passes/Inliner.cpp --- a/bolt/lib/Passes/Inliner.cpp +++ b/bolt/lib/Passes/Inliner.cpp @@ -395,8 +395,8 @@ bool Inliner::inlineCallsInFunction(BinaryFunction &Function) { BinaryContext &BC = Function.getBinaryContext(); - std::vector Blocks(Function.layout().begin(), - Function.layout().end()); + std::vector Blocks(Function.getLayout().block_begin(), + Function.getLayout().block_end()); llvm::sort( Blocks, [](const BinaryBasicBlock *BB1, const BinaryBasicBlock *BB2) { return BB1->getKnownExecutionCount() > BB2->getKnownExecutionCount(); diff --git a/bolt/lib/Passes/Instrumentation.cpp b/bolt/lib/Passes/Instrumentation.cpp --- a/bolt/lib/Passes/Instrumentation.cpp +++ b/bolt/lib/Passes/Instrumentation.cpp @@ -322,8 +322,8 @@ std::unordered_map> STOutSet; - for (auto BBI = Function.layout_rbegin(); BBI != Function.layout_rend(); - ++BBI) { + for (auto BBI = Function.getLayout().block_rbegin(); + BBI != Function.getLayout().block_rend(); ++BBI) { if ((*BBI)->isEntryPoint() || (*BBI)->isLandingPad()) { Stack.push(std::make_pair(nullptr, *BBI)); if (opts::InstrumentCalls && (*BBI)->isEntryPoint()) { diff --git a/bolt/lib/Passes/LongJmp.cpp b/bolt/lib/Passes/LongJmp.cpp --- a/bolt/lib/Passes/LongJmp.cpp +++ b/bolt/lib/Passes/LongJmp.cpp @@ -55,7 +55,9 @@ return nullptr; assert(!(*Func.begin()).isCold() && "Entry cannot be cold"); - for (auto I = Func.layout_begin(), E = Func.layout_end(); I != E; ++I) { + for (auto I = Func.getLayout().block_begin(), + E = Func.getLayout().block_end(); + I != E; ++I) { auto Next = std::next(I); if (Next != E && (*Next)->isCold()) return *I; @@ -272,7 +274,7 @@ uint64_t HotDot = HotAddresses[&Func]; uint64_t ColdDot = ColdAddresses[&Func]; bool Cold = false; - for (BinaryBasicBlock *BB : Func.layout()) { + for (const BinaryBasicBlock *BB : Func.getLayout().blocks()) { if (Cold || BB->isCold()) { Cold = true; BBAddresses[BB] = ColdDot; diff --git a/bolt/lib/Passes/LoopInversionPass.cpp b/bolt/lib/Passes/LoopInversionPass.cpp --- a/bolt/lib/Passes/LoopInversionPass.cpp +++ b/bolt/lib/Passes/LoopInversionPass.cpp @@ -31,11 +31,11 @@ bool LoopInversionPass::runOnFunction(BinaryFunction &BF) { bool IsChanged = false; - if (BF.layout_size() < 3 || !BF.hasValidProfile()) + if (BF.getLayout().block_size() < 3 || !BF.hasValidProfile()) return false; - BF.updateLayoutIndices(); - for (BinaryBasicBlock *BB : BF.layout()) { + BF.getLayout().updateLayoutIndices(); + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->succ_size() != 1 || BB->pred_size() != 1) continue; @@ -72,11 +72,12 @@ } if (IsChanged) { - BinaryFunction::BasicBlockOrderType NewOrder = BF.getLayout(); + BinaryFunction::BasicBlockOrderType NewOrder(BF.getLayout().block_begin(), + BF.getLayout().block_end()); llvm::sort(NewOrder, [&](BinaryBasicBlock *BB1, BinaryBasicBlock *BB2) { return BB1->getLayoutIndex() < BB2->getLayoutIndex(); }); - BF.updateBasicBlockLayout(NewOrder); + BF.getLayout().update(NewOrder); } return IsChanged; diff --git a/bolt/lib/Passes/ReorderAlgorithm.cpp b/bolt/lib/Passes/ReorderAlgorithm.cpp --- a/bolt/lib/Passes/ReorderAlgorithm.cpp +++ b/bolt/lib/Passes/ReorderAlgorithm.cpp @@ -136,10 +136,10 @@ // Initialize inter-cluster weights. if (ComputeEdges) - ClusterEdges.resize(BF.layout_size()); + ClusterEdges.resize(BF.getLayout().block_size()); // Initialize clusters and edge queue. - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { // Create a cluster for this BB. uint32_t I = Clusters.size(); Clusters.emplace_back(); @@ -169,7 +169,7 @@ LLVM_DEBUG(dbgs() << "Popped edge "; E.print(dbgs()); dbgs() << "\n"); // Case 1: BBSrc and BBDst are the same. Ignore this edge - if (SrcBB == DstBB || DstBB == *BF.layout_begin()) { + if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) { LLVM_DEBUG(dbgs() << "\tIgnored (same src, dst)\n"); continue; } @@ -276,7 +276,8 @@ "overflow detected"); // Ignore edges with same source and destination, edges that target the // entry block as well as the edge E itself. - if (SuccBB != SrcBB && SuccBB != *BF.layout_begin() && SuccBB != DstBB) + if (SuccBB != SrcBB && SuccBB != *BF.getLayout().block_begin() && + SuccBB != DstBB) W -= (int64_t)BI->Count; ++BI; } @@ -340,7 +341,7 @@ // Case 1: SrcBB and DstBB are the same or DstBB is the entry block. Ignore // this edge. - if (SrcBB == DstBB || DstBB == *BF.layout_begin()) { + if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) { LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs()); dbgs() << " (same src, dst)\n"); continue; @@ -403,17 +404,17 @@ std::vector> Weight; std::vector IndexToBB; - const size_t N = BF.layout_size(); + const size_t N = BF.getLayout().block_size(); assert(N <= std::numeric_limits::digits && "cannot use TSP solution for sizes larger than bits in uint64_t"); // Populating weight map and index map - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { BB->setLayoutIndex(IndexToBB.size()); IndexToBB.push_back(BB); } Weight.resize(N); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { auto BI = BB->branch_info_begin(); Weight[BB->getLayoutIndex()].resize(N); for (BinaryBasicBlock *SuccBB : BB->successors()) { @@ -495,14 +496,14 @@ // Finalize layout with BBs that weren't assigned to the layout using the // input layout. - for (BinaryBasicBlock *BB : BF.layout()) + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) if (Visited[BB->getLayoutIndex()] == false) Order.push_back(BB); } void OptimizeReorderAlgorithm::reorderBasicBlocks( const BinaryFunction &BF, BasicBlockOrder &Order) const { - if (BF.layout_empty()) + if (BF.getLayout().block_empty()) return; // Cluster basic blocks. @@ -518,7 +519,7 @@ void OptimizeBranchReorderAlgorithm::reorderBasicBlocks( const BinaryFunction &BF, BasicBlockOrder &Order) const { - if (BF.layout_empty()) + if (BF.getLayout().block_empty()) return; // Cluster basic blocks. @@ -623,11 +624,12 @@ void OptimizeCacheReorderAlgorithm::reorderBasicBlocks( const BinaryFunction &BF, BasicBlockOrder &Order) const { - if (BF.layout_empty()) + if (BF.getLayout().block_empty()) return; const uint64_t ColdThreshold = - opts::ColdThreshold * (*BF.layout_begin())->getExecutionCount() / 1000; + opts::ColdThreshold * + (*BF.getLayout().block_begin())->getExecutionCount() / 1000; // Cluster basic blocks. CAlgo->clusterBasicBlocks(BF); @@ -676,18 +678,18 @@ void ReverseReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF, BasicBlockOrder &Order) const { - if (BF.layout_empty()) + if (BF.getLayout().block_empty()) return; - BinaryBasicBlock *FirstBB = *BF.layout_begin(); + BinaryBasicBlock *FirstBB = *BF.getLayout().block_begin(); Order.push_back(FirstBB); - for (auto RLI = BF.layout_rbegin(); *RLI != FirstBB; ++RLI) + for (auto RLI = BF.getLayout().block_rbegin(); *RLI != FirstBB; ++RLI) Order.push_back(*RLI); } void RandomClusterReorderAlgorithm::reorderBasicBlocks( const BinaryFunction &BF, BasicBlockOrder &Order) const { - if (BF.layout_empty()) + if (BF.getLayout().block_empty()) return; // Cluster basic blocks. diff --git a/bolt/lib/Passes/ShrinkWrapping.cpp b/bolt/lib/Passes/ShrinkWrapping.cpp --- a/bolt/lib/Passes/ShrinkWrapping.cpp +++ b/bolt/lib/Passes/ShrinkWrapping.cpp @@ -379,7 +379,7 @@ } }; - for (BinaryBasicBlock *&BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { for (MCInst &Inst : *BB) { if (!BC.MIB->isCFI(Inst)) continue; @@ -1021,7 +1021,7 @@ // (PredictiveStackPointerTracking). Detect now for empty BBs and add a // dummy nop that is scheduled to be removed later. bool InvalidateRequired = false; - for (BinaryBasicBlock *&BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->size() != 0) continue; MCInst NewInst; @@ -1149,7 +1149,7 @@ void ShrinkWrapping::scheduleOldSaveRestoresRemoval(unsigned CSR, bool UsePushPops) { - for (BinaryBasicBlock *&BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { std::vector CFIs; for (auto I = BB->rbegin(), E = BB->rend(); I != E; ++I) { MCInst &Inst = *I; @@ -1574,7 +1574,7 @@ bool PrevAffectedZone = false; BinaryBasicBlock *PrevBB = nullptr; DominatorAnalysis &DA = Info.getDominatorAnalysis(); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->size() == 0) continue; const bool InAffectedZoneAtEnd = DA.count(*BB->rbegin(), *SavePoint); @@ -1625,7 +1625,7 @@ int PrevSPVal = -8; BinaryBasicBlock *PrevBB = nullptr; StackPointerTracking &SPT = Info.getStackPointerTracking(); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->size() == 0) continue; const int SPValAtEnd = SPT.getStateAt(*BB->rbegin())->first; @@ -1973,7 +1973,7 @@ // Update pass statistics uint64_t TotalInstrs = 0ULL; uint64_t TotalStoreInstrs = 0ULL; - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { uint64_t BBExecCount = BB->getExecutionCount(); if (!BBExecCount || BBExecCount == BinaryBasicBlock::COUNT_NO_PROFILE) continue; diff --git a/bolt/lib/Passes/SplitFunctions.cpp b/bolt/lib/Passes/SplitFunctions.cpp --- a/bolt/lib/Passes/SplitFunctions.cpp +++ b/bolt/lib/Passes/SplitFunctions.cpp @@ -105,8 +105,7 @@ return BB.getExecutionCount() == 0; } - void partition(BinaryFunction::reverse_order_iterator Start, - BinaryFunction::reverse_order_iterator End) const { + template void partition(const It Start, const It End) const { for (auto I = Start; I != End; ++I) { BinaryBasicBlock *BB = *I; if (!BB->canOutline()) @@ -124,24 +123,22 @@ bool canSplit(const BinaryFunction &BF) { return true; } bool canOutline(const BinaryBasicBlock &BB) { return true; } - void partition(BinaryFunction::reverse_order_iterator Start, - BinaryFunction::reverse_order_iterator End) const { - using It = decltype(Start); + template void partition(It Start, It End) const { + using DiffT = typename It::difference_type; const It OutlineableBegin = Start; const It OutlineableEnd = - std::find_if(OutlineableBegin, End, - [](BinaryBasicBlock *BB) { return !BB->canOutline(); }); - const It::difference_type NumOutlineableBlocks = - OutlineableEnd - OutlineableBegin; + std::find_if(OutlineableBegin, End, [](const BinaryBasicBlock *BB) { + return !BB->canOutline(); + }); + const DiffT NumOutlineableBlocks = OutlineableEnd - OutlineableBegin; // We want to split at least one block unless there are not blocks that can // be outlined - const auto MinimumSplit = - std::min(NumOutlineableBlocks, 1); - std::uniform_int_distribution Dist( - MinimumSplit, NumOutlineableBlocks); - const It::difference_type NumColdBlocks = Dist(*Gen); + const auto MinimumSplit = std::min(NumOutlineableBlocks, 1); + std::uniform_int_distribution Dist(MinimumSplit, + NumOutlineableBlocks); + const DiffT NumColdBlocks = Dist(*Gen); const It ColdEnd = OutlineableBegin + NumColdBlocks; LLVM_DEBUG(dbgs() << formatv("BOLT-DEBUG: randomly chose last {0} (out of " @@ -206,7 +203,9 @@ if (!Strategy.canSplit(BF)) return; - BinaryFunction::BasicBlockOrderType PreSplitLayout = BF.getLayout(); + FunctionLayout &Layout = BF.getLayout(); + BinaryFunction::BasicBlockOrderType PreSplitLayout(Layout.block_begin(), + Layout.block_end()); BinaryContext &BC = BF.getBinaryContext(); size_t OriginalHotSize; @@ -220,9 +219,11 @@ << Twine::utohexstr(ColdSize) << ">\n"); } + BinaryFunction::BasicBlockOrderType NewLayout(Layout.block_begin(), + Layout.block_end()); // Never outline the first basic block. - BF.layout_front()->setCanOutline(false); - for (BinaryBasicBlock *BB : BF.layout()) { + NewLayout.front()->setCanOutline(false); + for (BinaryBasicBlock *const BB : NewLayout) { if (!BB->canOutline()) continue; if (!Strategy.canOutline(*BB)) { @@ -260,26 +261,26 @@ // All blocks with 0 count that we can move go to the end of the function. // Even if they were natural to cluster formation and were seen in-between // hot basic blocks. - llvm::stable_sort(BF.layout(), - [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { - return A->canOutline() < B->canOutline(); - }); + stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { + return A->canOutline() < B->canOutline(); + }); } else if (BF.hasEHRanges() && !opts::SplitEH) { // Typically functions with exception handling have landing pads at the end. // We cannot move beginning of landing pads, but we can move 0-count blocks // comprising landing pads to the end and thus facilitate splitting. - auto FirstLP = BF.layout_begin(); + auto FirstLP = NewLayout.begin(); while ((*FirstLP)->isLandingPad()) ++FirstLP; - std::stable_sort(FirstLP, BF.layout_end(), + std::stable_sort(FirstLP, NewLayout.end(), [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { return A->canOutline() < B->canOutline(); }); } // Separate hot from cold starting from the bottom. - Strategy.partition(BF.layout_rbegin(), BF.layout_rend()); + Strategy.partition(NewLayout.rbegin(), NewLayout.rend()); + BF.getLayout().update(NewLayout); // For shared objects, invoke instructions and corresponding landing pads // have to be placed in the same fragment. When we split them, create @@ -307,9 +308,9 @@ if (PreSplitLayout.size() != BF.size()) PreSplitLayout = mergeEHTrampolines(BF, PreSplitLayout, Trampolines); - BF.updateBasicBlockLayout(PreSplitLayout); for (BinaryBasicBlock &BB : BF) BB.setIsCold(false); + BF.getLayout().update(PreSplitLayout); } else { SplitBytesHot += HotSize; SplitBytesCold += ColdSize; @@ -366,10 +367,12 @@ // All trampoline blocks were added to the end of the function. Place them at // the end of corresponding fragments. - std::stable_sort(BF.layout_begin(), BF.layout_end(), - [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { - return A->isCold() < B->isCold(); - }); + BinaryFunction::BasicBlockOrderType NewLayout(BF.getLayout().block_begin(), + BF.getLayout().block_end()); + stable_sort(NewLayout, [&](BinaryBasicBlock *A, BinaryBasicBlock *B) { + return A->isCold() < B->isCold(); + }); + BF.getLayout().update(NewLayout); // Conservatively introduce branch instructions. BF.fixBranches(); diff --git a/bolt/lib/Passes/StokeInfo.cpp b/bolt/lib/Passes/StokeInfo.cpp --- a/bolt/lib/Passes/StokeInfo.cpp +++ b/bolt/lib/Passes/StokeInfo.cpp @@ -46,7 +46,7 @@ void StokeInfo::checkInstr(const BinaryFunction &BF, StokeFuncInfo &FuncInfo) { MCPlusBuilder *MIB = BF.getBinaryContext().MIB.get(); BitVector RegV(NumRegs, false); - for (BinaryBasicBlock *BB : BF.layout()) { + for (BinaryBasicBlock *BB : BF.getLayout().blocks()) { if (BB->empty()) continue; @@ -81,7 +81,7 @@ if (IsRipAddr) FuncInfo.HasRipAddr = true; } // end of for (auto &It : ...) - } // end of for (auto *BB : ...) + } // end of for (auto *BB : ...) } bool StokeInfo::checkFunction(BinaryFunction &BF, DataflowInfoManager &DInfo, diff --git a/bolt/lib/Passes/TailDuplication.cpp b/bolt/lib/Passes/TailDuplication.cpp --- a/bolt/lib/Passes/TailDuplication.cpp +++ b/bolt/lib/Passes/TailDuplication.cpp @@ -256,14 +256,12 @@ if (&BB == &Succ) return true; - BinaryFunction::BasicBlockOrderType BlockLayout = - BB.getFunction()->getLayout(); uint64_t Distance = 0; int Direction = (Succ.getLayoutIndex() > BB.getLayoutIndex()) ? 1 : -1; for (unsigned I = BB.getLayoutIndex() + Direction; I != Succ.getLayoutIndex(); I += Direction) { - Distance += BlockLayout[I]->getOriginalSize(); + Distance += BB.getFunction()->getLayout().getBlock(I)->getOriginalSize(); if (Distance > opts::TailDuplicationMinimumOffset) return false; } @@ -410,15 +408,15 @@ BinaryBasicBlock *Pred, BinaryBasicBlock *Tail) const { // Collect (estimated) basic block sizes - DenseMap BBSize; - for (BinaryBasicBlock *BB : BF.layout()) { - BBSize[BB] = std::max(BB->estimateSize(Emitter), 1); + DenseMap BBSize; + for (const BinaryBasicBlock &BB : BF) { + BBSize[&BB] = std::max(BB.estimateSize(Emitter), 1); } // Build current addresses of basic blocks starting at the entry block DenseMap CurAddr; uint64_t Addr = 0; - for (BinaryBasicBlock *SrcBB : BF.layout()) { + for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) { CurAddr[SrcBB] = Addr; Addr += BBSize[SrcBB]; } @@ -426,7 +424,7 @@ // Build new addresses (after duplication) starting at the entry block DenseMap NewAddr; Addr = 0; - for (BinaryBasicBlock *SrcBB : BF.layout()) { + for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) { NewAddr[SrcBB] = Addr; Addr += BBSize[SrcBB]; if (SrcBB == Pred) @@ -435,7 +433,7 @@ // Compute the cache score for the existing layout of basic blocks double CurScore = 0; - for (BinaryBasicBlock *SrcBB : BF.layout()) { + for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) { auto BI = SrcBB->branch_info_begin(); for (BinaryBasicBlock *DstBB : SrcBB->successors()) { if (SrcBB != DstBB) { @@ -448,7 +446,7 @@ // Compute the cache score for the layout of blocks after tail duplication double NewScore = 0; - for (BinaryBasicBlock *SrcBB : BF.layout()) { + for (BinaryBasicBlock *SrcBB : BF.getLayout().blocks()) { auto BI = SrcBB->branch_info_begin(); for (BinaryBasicBlock *DstBB : SrcBB->successors()) { if (SrcBB != DstBB) { @@ -489,7 +487,7 @@ return BlocksToDuplicate; } // Do not append basic blocks after the last hot block in the current layout - auto NextBlock = BF.getBasicBlockAfter(Pred); + auto NextBlock = BF.getLayout().getBasicBlockAfter(Pred); if (NextBlock == nullptr || (!Pred->isCold() && NextBlock->isCold())) { return BlocksToDuplicate; } @@ -576,11 +574,12 @@ Emitter = Function.getBinaryContext().createIndependentMCCodeEmitter(); } - Function.updateLayoutIndices(); + Function.getLayout().updateLayoutIndices(); // New blocks will be added and layout will change, // so make a copy here to iterate over the original layout - BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout(); + BinaryFunction::BasicBlockOrderType BlockLayout( + Function.getLayout().block_begin(), Function.getLayout().block_end()); bool ModifiedFunction = false; for (BinaryBasicBlock *BB : BlockLayout) { AllDynamicCount += BB->getKnownExecutionCount(); @@ -627,7 +626,7 @@ } // Layout indices might be stale after duplication - Function.updateLayoutIndices(); + Function.getLayout().updateLayoutIndices(); } if (ModifiedFunction) ModifiedFunctions++; diff --git a/bolt/lib/Passes/ThreeWayBranch.cpp b/bolt/lib/Passes/ThreeWayBranch.cpp --- a/bolt/lib/Passes/ThreeWayBranch.cpp +++ b/bolt/lib/Passes/ThreeWayBranch.cpp @@ -31,7 +31,8 @@ MCContext *Ctx = BC.Ctx.get(); // New blocks will be added and layout will change, // so make a copy here to iterate over the original layout - BinaryFunction::BasicBlockOrderType BlockLayout = Function.getLayout(); + BinaryFunction::BasicBlockOrderType BlockLayout( + Function.getLayout().block_begin(), Function.getLayout().block_end()); for (BinaryBasicBlock *BB : BlockLayout) { // The block must be hot if (BB->getExecutionCount() == 0 || diff --git a/bolt/lib/Passes/ValidateInternalCalls.cpp b/bolt/lib/Passes/ValidateInternalCalls.cpp --- a/bolt/lib/Passes/ValidateInternalCalls.cpp +++ b/bolt/lib/Passes/ValidateInternalCalls.cpp @@ -161,7 +161,7 @@ // Connect this block with the returning block of the caller BinaryBasicBlock *CallerBlock = Info.getInsnToBBMap()[&ReachingInst]; BinaryBasicBlock *ReturnDestBlock = - Function.getBasicBlockAfter(CallerBlock); + Function.getLayout().getBasicBlockAfter(CallerBlock); BB.addSuccessor(ReturnDestBlock, BB.getExecutionCount(), 0); } }; diff --git a/bolt/lib/Profile/BoltAddressTranslation.cpp b/bolt/lib/Profile/BoltAddressTranslation.cpp --- a/bolt/lib/Profile/BoltAddressTranslation.cpp +++ b/bolt/lib/Profile/BoltAddressTranslation.cpp @@ -70,7 +70,7 @@ << Twine::utohexstr(Function.getOutputAddress()) << "\n"); MapTy Map; const bool IsSplit = Function.isSplit(); - for (BinaryBasicBlock *&BB : Function.layout()) { + for (const BinaryBasicBlock *BB : Function.getLayout().blocks()) { if (IsSplit && BB->isCold()) break; writeEntriesForBB(Map, *BB, Function.getOutputAddress()); @@ -83,7 +83,7 @@ // Cold map Map.clear(); LLVM_DEBUG(dbgs() << " Cold part\n"); - for (BinaryBasicBlock *&BB : Function.layout()) { + for (const BinaryBasicBlock *BB : Function.getLayout().blocks()) { if (!BB->isCold()) continue; writeEntriesForBB(Map, *BB, Function.cold().getAddress()); diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp --- a/bolt/lib/Profile/DataAggregator.cpp +++ b/bolt/lib/Profile/DataAggregator.cpp @@ -877,7 +877,7 @@ // the previous block (that instruction should be a call). if (From == FromBB->getOffset() && !BF.containsAddress(FirstLBR.From) && !FromBB->isEntryPoint() && !FromBB->isLandingPad()) { - BinaryBasicBlock *PrevBB = BF.BasicBlocksLayout[FromBB->getIndex() - 1]; + BinaryBasicBlock *PrevBB = BF.getLayout().getBlock(FromBB->getIndex() - 1); if (PrevBB->getSuccessor(FromBB->getLabel())) { const MCInst *Instr = PrevBB->getLastNonPseudoInstr(); if (Instr && BC.MIB->isCall(*Instr)) @@ -897,10 +897,10 @@ return true; // Process blocks in the original layout order. - BinaryBasicBlock *BB = BF.BasicBlocksLayout[FromBB->getIndex()]; + BinaryBasicBlock *BB = BF.getLayout().getBlock(FromBB->getIndex()); assert(BB == FromBB && "index mismatch"); while (BB != ToBB) { - BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[BB->getIndex() + 1]; + BinaryBasicBlock *NextBB = BF.getLayout().getBlock(BB->getIndex() + 1); assert((NextBB && NextBB->getOffset() > BB->getOffset()) && "bad layout"); // Check for bad LBRs. diff --git a/bolt/lib/Profile/DataReader.cpp b/bolt/lib/Profile/DataReader.cpp --- a/bolt/lib/Profile/DataReader.cpp +++ b/bolt/lib/Profile/DataReader.cpp @@ -737,9 +737,9 @@ } // The real destination is the layout successor of the detected ToBB. - if (ToBB == BF.BasicBlocksLayout.back()) + if (ToBB == BF.getLayout().block_back()) return false; - BinaryBasicBlock *NextBB = BF.BasicBlocksLayout[ToBB->getIndex() + 1]; + BinaryBasicBlock *NextBB = BF.getLayout().getBlock(ToBB->getIndex() + 1); assert((NextBB && NextBB->getOffset() > ToBB->getOffset()) && "bad layout"); ToBB = NextBB; } diff --git a/bolt/lib/Rewrite/BoltDiff.cpp b/bolt/lib/Rewrite/BoltDiff.cpp --- a/bolt/lib/Rewrite/BoltDiff.cpp +++ b/bolt/lib/Rewrite/BoltDiff.cpp @@ -344,14 +344,14 @@ const BinaryFunction *const &Func1 = MapEntry.second; const BinaryFunction *const &Func2 = MapEntry.first; - auto Iter1 = Func1->layout_begin(); - auto Iter2 = Func2->layout_begin(); + auto Iter1 = Func1->getLayout().block_begin(); + auto Iter2 = Func2->getLayout().block_begin(); bool Match = true; std::map Map; std::map> EMap; - while (Iter1 != Func1->layout_end()) { - if (Iter2 == Func2->layout_end()) { + while (Iter1 != Func1->getLayout().block_end()) { + if (Iter2 == Func2->getLayout().block_end()) { Match = false; break; } @@ -393,7 +393,7 @@ ++Iter1; ++Iter2; } - if (!Match || Iter2 != Func2->layout_end()) + if (!Match || Iter2 != Func2->getLayout().block_end()) continue; BBMap.insert(Map.begin(), Map.end());