Index: include/llvm/Transforms/Utils/Outliner.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/Outliner.h @@ -0,0 +1,175 @@ +//===- Outliner.h - A generic outlining utility interface around the Utils lib +//------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file includes utilities for defining outliner functionality. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_UTILS_OUTLINER_H +#define LLVM_TRANSFORMS_UTILS_OUTLINER_H + +#include "llvm/ADT/ArrayRef.h" +#include + +namespace llvm { +class BitVector; + +/// \brief A potential candidate to be outlined. +struct OutlineCandidate { + struct AdditionalData {}; + /// The amount of instructions being saved. + unsigned Len; + + /// The computed benefit of outlining this candidate. + unsigned Benefit = 0; + + /// The estimated benefit we receive per occurrence. + unsigned BenefitPerOccur = 0; + + /// Identifier for each occurrence. + std::vector Occurrences; + + /// Instruction size that this candidate shares with the next. + unsigned SharedSizeWithNext = 0; + + /// Additional info attached to this candidate. + AdditionalData *Data = nullptr; + + // Accessors. + using Iterator = std::vector::iterator; + Iterator begin() { return Occurrences.begin(); } + Iterator end() { return Occurrences.end(); } + size_t size() const { return Occurrences.size(); } + + // Check to see if this chain is still profitable to outline. + bool isValid() const { return Benefit != 0; } + // Set this candidate as not profitable. + void invalidate() { Benefit = 0; } + // Get the candidate at index /p Idx. + unsigned getOccurrence(size_t Idx) const { + assert(Idx < size() && "Invalid occurrence index."); + return Occurrences[Idx]; + } + // Remove the occurrence at index /p Idx + void removeOccurrence(size_t Idx) { + Occurrences[Idx] = Occurrences.back(); + Occurrences.pop_back(); + } + // Get the additional data attached. + template T &getData() { + assert(Data && "Getting an invalid data pointer."); + return *static_cast(Data); + } + + OutlineCandidate(unsigned Len, std::vector &Occurrences) + : Len(Len), Occurrences(Occurrences) {} + OutlineCandidate(unsigned Len) : Len(Len) {} +}; + +/// \brief The base interface for outliner functionality. +class OutlinerImpl { +public: + virtual ~OutlinerImpl() {} + bool run(); + +protected: + // Main interface functions. + virtual void findOutliningOccurrences(std::vector &) = 0; + virtual void analyzeCandidateList(std::vector &) = 0; + virtual bool pruneAndOutline(std::vector &) = 0; +}; + +/// \brief Outliner impl for outlining sequences. +class SequencedOutlinerImpl : public OutlinerImpl { +public: + /// \brief A base mapper that is used to map instructions to a congruency + /// vector. + class OutlinerMapper { + public: + friend class SequencedOutlinerImpl; + virtual ~OutlinerMapper() {} + + // Get the unsigned vector representation of the congruencies for the + // module. + ArrayRef getCongruencyVector() const { return CCVec; } + // Get the number of different congruency classes found in the module. + unsigned getNumCongruencyClasses() const { return CCID; } + // Get the number of instructions mapped in this module. + virtual unsigned getNumMappedInstructions() const = 0; + + protected: + /// Internal vector representation of the instructions within the mapped + /// module. + std::vector CCVec; + + /// Current Congruency ID. + unsigned CCID = 1; + + /// An id for illegal instructions. + unsigned IllegalID = UINT_MAX; + + private: + // Helper utility to finalize the congruency vector for use in candidate + // selection. + void prepareForCandidateSelection(); + }; + + // The method that the impl uses to find outlining candidates. + enum CandidateSelectionMethod { + CSM_SuffixTree, + CSM_SuffixArray, + CSM_None + }; + + SequencedOutlinerImpl( + OutlinerMapper &OM, unsigned MinInstructionLen, unsigned MinOccurrences, + CandidateSelectionMethod CSM, + function_ref + OutlineFn) + : OM(OM), MinInstructionLen(MinInstructionLen), + MinOccurrences(MinOccurrences), CSM(CSM), OutlineFn(OutlineFn) {} + virtual ~SequencedOutlinerImpl() {} + +protected: + // Checks to see whether we should prune an abstract candidate. + virtual bool prePrune(ArrayRef, unsigned Size) { + return false; + }; + // Helper for getting the mapper as a parent type. + template + T &getMapperAs() { + return static_cast(OM); + } + + // A mapper that holds the state of the current module. + OutlinerMapper &OM; + + // Metric utilities + unsigned MinInstructionLen; + unsigned MinOccurrences; +private: + // Helpers for finding candidates in different ways. + CandidateSelectionMethod CSM; + + /// Helper function to outline a profitable candidate. + function_ref + OutlineFn; + + void findSTCandidates(std::vector &CL); + void findSACandidates(std::vector &CL); + + // Base overrides. + virtual void + findOutliningOccurrences(std::vector &) override; + virtual bool pruneAndOutline(std::vector &) override; +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_OUTLINER_H Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -41,25 +41,17 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRBuilder.h" -#include "llvm/Support/Allocator.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include -#include +#include "llvm/Transforms/Utils/Outliner.h" #include -#include -#include #define DEBUG_TYPE "machine-outliner" @@ -69,586 +61,58 @@ STATISTIC(FunctionsCreated, "Number of functions created"); namespace { - -/// \brief An individual sequence of instructions to be replaced with a call to -/// an outlined function. -struct Candidate { - - /// Set to false if the candidate overlapped with another candidate. - bool InCandidateList = true; - - /// The start index of this \p Candidate. - size_t StartIdx; - - /// The number of instructions in this \p Candidate. - size_t Len; - - /// The index of this \p Candidate's \p OutlinedFunction in the list of - /// \p OutlinedFunctions. - size_t FunctionIdx; - - /// \brief The number of instructions that would be saved by outlining every - /// candidate of this type. - /// - /// This is a fixed value which is not updated during the candidate pruning - /// process. It is only used for deciding which candidate to keep if two - /// candidates overlap. The true benefit is stored in the OutlinedFunction - /// for some given candidate. - unsigned Benefit = 0; - - Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx) - : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} - - Candidate() {} - - /// \brief Used to ensure that \p Candidates are outlined in an order that - /// preserves the start and end indices of other \p Candidates. - bool operator<(const Candidate &RHS) const { return StartIdx > RHS.StartIdx; } -}; - -/// \brief The information necessary to create an outlined function for some -/// class of candidate. -struct OutlinedFunction { - - /// The actual outlined function created. - /// This is initialized after we go through and create the actual function. - MachineFunction *MF = nullptr; - - /// A numbefr assigned to this function which appears at the end of its name. - size_t Name; - - /// The number of candidates for this OutlinedFunction. - size_t OccurrenceCount = 0; - - /// \brief The sequence of integers corresponding to the instructions in this - /// function. - std::vector Sequence; - - /// The number of instructions this function would save. - unsigned Benefit = 0; - - /// \brief Set to true if candidates for this outlined function should be - /// replaced with tail calls to this OutlinedFunction. - bool IsTailCall = false; - - OutlinedFunction(size_t Name, size_t OccurrenceCount, - const std::vector &Sequence, unsigned Benefit, - bool IsTailCall) - : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence), - Benefit(Benefit), IsTailCall(IsTailCall) {} -}; - -/// Represents an undefined index in the suffix tree. -const size_t EmptyIdx = -1; - -/// A node in a suffix tree which represents a substring or suffix. -/// -/// Each node has either no children or at least two children, with the root -/// being a exception in the empty tree. -/// -/// Children are represented as a map between unsigned integers and nodes. If -/// a node N has a child M on unsigned integer k, then the mapping represented -/// by N is a proper prefix of the mapping represented by M. Note that this, -/// although similar to a trie is somewhat different: each node stores a full -/// substring of the full mapping rather than a single character state. -/// -/// Each internal node contains a pointer to the internal node representing -/// the same string, but with the first character chopped off. This is stored -/// in \p Link. Each leaf node stores the start index of its respective -/// suffix in \p SuffixIdx. -struct SuffixTreeNode { - - /// The children of this node. - /// - /// A child existing on an unsigned integer implies that from the mapping - /// represented by the current node, there is a way to reach another - /// mapping by tacking that character on the end of the current string. - DenseMap Children; - - /// A flag set to false if the node has been pruned from the tree. - bool IsInTree = true; - - /// The start index of this node's substring in the main string. - size_t StartIdx = EmptyIdx; - - /// The end index of this node's substring in the main string. - /// - /// Every leaf node must have its \p EndIdx incremented at the end of every - /// step in the construction algorithm. To avoid having to update O(N) - /// nodes individually at the end of every step, the end index is stored - /// as a pointer. - size_t *EndIdx = nullptr; - - /// For leaves, the start index of the suffix represented by this node. - /// - /// For all other nodes, this is ignored. - size_t SuffixIdx = EmptyIdx; - - /// \brief For internal nodes, a pointer to the internal node representing - /// the same sequence with the first character chopped off. - /// - /// This acts as a shortcut in Ukkonen's algorithm. One of the things that - /// Ukkonen's algorithm does to achieve linear-time construction is - /// keep track of which node the next insert should be at. This makes each - /// insert O(1), and there are a total of O(N) inserts. The suffix link - /// helps with inserting children of internal nodes. - /// - /// Say we add a child to an internal node with associated mapping S. The - /// next insertion must be at the node representing S - its first character. - /// This is given by the way that we iteratively build the tree in Ukkonen's - /// algorithm. The main idea is to look at the suffixes of each prefix in the - /// string, starting with the longest suffix of the prefix, and ending with - /// the shortest. Therefore, if we keep pointers between such nodes, we can - /// move to the next insertion point in O(1) time. If we don't, then we'd - /// have to query from the root, which takes O(N) time. This would make the - /// construction algorithm O(N^2) rather than O(N). - SuffixTreeNode *Link = nullptr; - - /// The parent of this node. Every node except for the root has a parent. - SuffixTreeNode *Parent = nullptr; - - /// The number of times this node's string appears in the tree. - /// - /// This is equal to the number of leaf children of the string. It represents - /// the number of suffixes that the node's string is a prefix of. - size_t OccurrenceCount = 0; - - /// The length of the string formed by concatenating the edge labels from the - /// root to this node. - size_t ConcatLen = 0; - - /// Returns true if this node is a leaf. - bool isLeaf() const { return SuffixIdx != EmptyIdx; } - - /// Returns true if this node is the root of its owning \p SuffixTree. - bool isRoot() const { return StartIdx == EmptyIdx; } - - /// Return the number of elements in the substring associated with this node. - size_t size() const { - - // Is it the root? If so, it's the empty string so return 0. - if (isRoot()) - return 0; - - assert(*EndIdx != EmptyIdx && "EndIdx is undefined!"); - - // Size = the number of elements in the string. - // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1. - return *EndIdx - StartIdx + 1; - } - - SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link, - SuffixTreeNode *Parent) - : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {} - - SuffixTreeNode() {} -}; - -/// A data structure for fast substring queries. -/// -/// Suffix trees represent the suffixes of their input strings in their leaves. -/// A suffix tree is a type of compressed trie structure where each node -/// represents an entire substring rather than a single character. Each leaf -/// of the tree is a suffix. -/// -/// A suffix tree can be seen as a type of state machine where each state is a -/// substring of the full string. The tree is structured so that, for a string -/// of length N, there are exactly N leaves in the tree. This structure allows -/// us to quickly find repeated substrings of the input string. -/// -/// In this implementation, a "string" is a vector of unsigned integers. -/// These integers may result from hashing some data type. A suffix tree can -/// contain 1 or many strings, which can then be queried as one large string. -/// -/// The suffix tree is implemented using Ukkonen's algorithm for linear-time -/// suffix tree construction. Ukkonen's algorithm is explained in more detail -/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The -/// paper is available at -/// -/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf -class SuffixTree { -public: - /// Stores each leaf node in the tree. - /// - /// This is used for finding outlining candidates. - std::vector LeafVector; - - /// Each element is an integer representing an instruction in the module. - ArrayRef Str; - -private: - /// Maintains each node in the tree. - SpecificBumpPtrAllocator NodeAllocator; - - /// The root of the suffix tree. - /// - /// The root represents the empty string. It is maintained by the - /// \p NodeAllocator like every other node in the tree. - SuffixTreeNode *Root = nullptr; - - /// Maintains the end indices of the internal nodes in the tree. - /// - /// Each internal node is guaranteed to never have its end index change - /// during the construction algorithm; however, leaves must be updated at - /// every step. Therefore, we need to store leaf end indices by reference - /// to avoid updating O(N) leaves at every step of construction. Thus, - /// every internal node must be allocated its own end index. - BumpPtrAllocator InternalEndIdxAllocator; - - /// The end index of each leaf in the tree. - size_t LeafEndIdx = -1; - - /// \brief Helper struct which keeps track of the next insertion point in - /// Ukkonen's algorithm. - struct ActiveState { - /// The next node to insert at. - SuffixTreeNode *Node; - - /// The index of the first character in the substring currently being added. - size_t Idx = EmptyIdx; - - /// The length of the substring we have to add at the current step. - size_t Len = 0; - }; - - /// \brief The point the next insertion will take place at in the - /// construction algorithm. - ActiveState Active; - - /// Allocate a leaf node and add it to the tree. - /// - /// \param Parent The parent of this node. - /// \param StartIdx The start index of this node's associated string. - /// \param Edge The label on the edge leaving \p Parent to this node. - /// - /// \returns A pointer to the allocated leaf node. - SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx, - unsigned Edge) { - - assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); - - SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent); - Parent.Children[Edge] = N; - - return N; - } - - /// Allocate an internal node and add it to the tree. - /// - /// \param Parent The parent of this node. Only null when allocating the root. - /// \param StartIdx The start index of this node's associated string. - /// \param EndIdx The end index of this node's associated string. - /// \param Edge The label on the edge leaving \p Parent to this node. - /// - /// \returns A pointer to the allocated internal node. - SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx, - size_t EndIdx, unsigned Edge) { - - assert(StartIdx <= EndIdx && "String can't start after it ends!"); - assert(!(!Parent && StartIdx != EmptyIdx) && - "Non-root internal nodes must have parents!"); - - size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx); - SuffixTreeNode *N = new (NodeAllocator.Allocate()) - SuffixTreeNode(StartIdx, E, Root, Parent); - if (Parent) - Parent->Children[Edge] = N; - - return N; - } - - /// \brief Set the suffix indices of the leaves to the start indices of their - /// respective suffixes. Also stores each leaf in \p LeafVector at its - /// respective suffix index. - /// - /// \param[in] CurrNode The node currently being visited. - /// \param CurrIdx The current index of the string being visited. - void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) { - - bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); - - // Store the length of the concatenation of all strings from the root to - // this node. - if (!CurrNode.isRoot()) { - if (CurrNode.ConcatLen == 0) - CurrNode.ConcatLen = CurrNode.size(); - - if (CurrNode.Parent) - CurrNode.ConcatLen += CurrNode.Parent->ConcatLen; - } - - // Traverse the tree depth-first. - for (auto &ChildPair : CurrNode.Children) { - assert(ChildPair.second && "Node had a null child!"); - setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size()); - } - - // Is this node a leaf? - if (IsLeaf) { - // If yes, give it a suffix index and bump its parent's occurrence count. - CurrNode.SuffixIdx = Str.size() - CurrIdx; - assert(CurrNode.Parent && "CurrNode had no parent!"); - CurrNode.Parent->OccurrenceCount++; - - // Store the leaf in the leaf vector for pruning later. - LeafVector[CurrNode.SuffixIdx] = &CurrNode; - } - } - - /// \brief Construct the suffix tree for the prefix of the input ending at - /// \p EndIdx. - /// - /// Used to construct the full suffix tree iteratively. At the end of each - /// step, the constructed suffix tree is either a valid suffix tree, or a - /// suffix tree with implicit suffixes. At the end of the final step, the - /// suffix tree is a valid tree. - /// - /// \param EndIdx The end index of the current prefix in the main string. - /// \param SuffixesToAdd The number of suffixes that must be added - /// to complete the suffix tree at the current phase. - /// - /// \returns The number of suffixes that have not been added at the end of - /// this step. - unsigned extend(size_t EndIdx, size_t SuffixesToAdd) { - SuffixTreeNode *NeedsLink = nullptr; - - while (SuffixesToAdd > 0) { - - // Are we waiting to add anything other than just the last character? - if (Active.Len == 0) { - // If not, then say the active index is the end index. - Active.Idx = EndIdx; - } - - assert(Active.Idx <= EndIdx && "Start index can't be after end index!"); - - // The first character in the current substring we're looking at. - unsigned FirstChar = Str[Active.Idx]; - - // Have we inserted anything starting with FirstChar at the current node? - if (Active.Node->Children.count(FirstChar) == 0) { - // If not, then we can just insert a leaf and move too the next step. - insertLeaf(*Active.Node, EndIdx, FirstChar); - - // The active node is an internal node, and we visited it, so it must - // need a link if it doesn't have one. - if (NeedsLink) { - NeedsLink->Link = Active.Node; - NeedsLink = nullptr; - } - } else { - // There's a match with FirstChar, so look for the point in the tree to - // insert a new node. - SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; - - size_t SubstringLen = NextNode->size(); - - // Is the current suffix we're trying to insert longer than the size of - // the child we want to move to? - if (Active.Len >= SubstringLen) { - // If yes, then consume the characters we've seen and move to the next - // node. - Active.Idx += SubstringLen; - Active.Len -= SubstringLen; - Active.Node = NextNode; - continue; - } - - // Otherwise, the suffix we're trying to insert must be contained in the - // next node we want to move to. - unsigned LastChar = Str[EndIdx]; - - // Is the string we're trying to insert a substring of the next node? - if (Str[NextNode->StartIdx + Active.Len] == LastChar) { - // If yes, then we're done for this step. Remember our insertion point - // and move to the next end index. At this point, we have an implicit - // suffix tree. - if (NeedsLink && !Active.Node->isRoot()) { - NeedsLink->Link = Active.Node; - NeedsLink = nullptr; - } - - Active.Len++; - break; - } - - // The string we're trying to insert isn't a substring of the next node, - // but matches up to a point. Split the node. - // - // For example, say we ended our search at a node n and we're trying to - // insert ABD. Then we'll create a new node s for AB, reduce n to just - // representing C, and insert a new leaf node l to represent d. This - // allows us to ensure that if n was a leaf, it remains a leaf. - // - // | ABC ---split---> | AB - // n s - // C / \ D - // n l - - // The node s from the diagram - SuffixTreeNode *SplitNode = - insertInternalNode(Active.Node, NextNode->StartIdx, - NextNode->StartIdx + Active.Len - 1, FirstChar); - - // Insert the new node representing the new substring into the tree as - // a child of the split node. This is the node l from the diagram. - insertLeaf(*SplitNode, EndIdx, LastChar); - - // Make the old node a child of the split node and update its start - // index. This is the node n from the diagram. - NextNode->StartIdx += Active.Len; - NextNode->Parent = SplitNode; - SplitNode->Children[Str[NextNode->StartIdx]] = NextNode; - - // SplitNode is an internal node, update the suffix link. - if (NeedsLink) - NeedsLink->Link = SplitNode; - - NeedsLink = SplitNode; - } - - // We've added something new to the tree, so there's one less suffix to - // add. - SuffixesToAdd--; - - if (Active.Node->isRoot()) { - if (Active.Len > 0) { - Active.Len--; - Active.Idx = EndIdx - SuffixesToAdd + 1; - } - } else { - // Start the next phase at the next smallest suffix. - Active.Node = Active.Node->Link; - } - } - - return SuffixesToAdd; - } - -public: - /// Construct a suffix tree from a sequence of unsigned integers. - /// - /// \param Str The string to construct the suffix tree for. - SuffixTree(const std::vector &Str) : Str(Str) { - Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); - Root->IsInTree = true; - Active.Node = Root; - LeafVector = std::vector(Str.size()); - - // Keep track of the number of suffixes we have to add of the current - // prefix. - size_t SuffixesToAdd = 0; - Active.Node = Root; - - // Construct the suffix tree iteratively on each prefix of the string. - // PfxEndIdx is the end index of the current prefix. - // End is one past the last element in the string. - for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) { - SuffixesToAdd++; - LeafEndIdx = PfxEndIdx; // Extend each of the leaves. - SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); - } - - // Set the suffix indices of each leaf. - assert(Root && "Root node can't be nullptr!"); - setSuffixIndices(*Root, 0); - } -}; - /// \brief Maps \p MachineInstrs to unsigned integers and stores the mappings. -struct InstructionMapper { - - /// \brief The next available integer to assign to a \p MachineInstr that - /// cannot be outlined. - /// - /// Set to -3 for compatability with \p DenseMapInfo. - unsigned IllegalInstrNumber = -3; - - /// \brief The next available integer to assign to a \p MachineInstr that can - /// be outlined. - unsigned LegalInstrNumber = 0; +struct MachineInstructionMapper : public SequencedOutlinerImpl::OutlinerMapper { + // Get the number of instructions mapped in this module. + virtual unsigned getNumMappedInstructions() const override { + return InstrList.size(); + } /// Correspondence from \p MachineInstrs to unsigned integers. DenseMap InstructionIntegerMap; - /// Corresponcence from unsigned integers to \p MachineInstrs. - /// Inverse of \p InstructionIntegerMap. - DenseMap IntegerInstructionMap; - - /// The vector of unsigned integers that the module is mapped to. - std::vector UnsignedVec; - /// \brief Stores the location of the instruction associated with the integer - /// at index i in \p UnsignedVec for each index i. - std::vector InstrList; + /// at index i in \p CCVec for each index i. + std::vector InstrList; /// \brief Maps \p *It to a legal integer. /// - /// Updates \p InstrList, \p UnsignedVec, \p InstructionIntegerMap, + /// Updates \p InstrList, \p CCVec, \p InstructionIntegerMap, /// \p IntegerInstructionMap, and \p LegalInstrNumber. /// /// \returns The integer that \p *It was mapped to. - unsigned mapToLegalUnsigned(MachineBasicBlock::iterator &It) { - + void mapToLegalUnsigned(MachineBasicBlock::iterator &It) { // Get the integer for this instruction or give it the current // LegalInstrNumber. - InstrList.push_back(It); MachineInstr &MI = *It; + InstrList.push_back(&MI); + bool WasInserted; DenseMap::iterator ResultIt; std::tie(ResultIt, WasInserted) = - InstructionIntegerMap.insert(std::make_pair(&MI, LegalInstrNumber)); - unsigned MINumber = ResultIt->second; + InstructionIntegerMap.insert(std::make_pair(&MI, CCID)); + CCVec.push_back(ResultIt->second); // There was an insertion. - if (WasInserted) { - LegalInstrNumber++; - IntegerInstructionMap.insert(std::make_pair(MINumber, &MI)); - } - - UnsignedVec.push_back(MINumber); - - // Make sure we don't overflow or use any integers reserved by the DenseMap. - if (LegalInstrNumber >= IllegalInstrNumber) - report_fatal_error("Instruction mapping overflow!"); - - assert(LegalInstrNumber != DenseMapInfo::getEmptyKey() && - "Tried to assign DenseMap tombstone or empty key to instruction."); - assert(LegalInstrNumber != DenseMapInfo::getTombstoneKey() && - "Tried to assign DenseMap tombstone or empty key to instruction."); - - return MINumber; + if (WasInserted) + CCID++; } /// Maps \p *It to an illegal integer. /// - /// Updates \p InstrList, \p UnsignedVec, and \p IllegalInstrNumber. + /// Updates \p InstrList, \p CCVec, and \p IllegalInstrNumber. /// /// \returns The integer that \p *It was mapped to. - unsigned mapToIllegalUnsigned(MachineBasicBlock::iterator &It) { - unsigned MINumber = IllegalInstrNumber; - - InstrList.push_back(It); - UnsignedVec.push_back(IllegalInstrNumber); - IllegalInstrNumber--; - - assert(LegalInstrNumber < IllegalInstrNumber && - "Instruction mapping overflow!"); - - assert(IllegalInstrNumber != DenseMapInfo::getEmptyKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - - assert(IllegalInstrNumber != DenseMapInfo::getTombstoneKey() && - "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); - - return MINumber; + void mapToIllegalUnsigned(MachineBasicBlock::iterator &It) { + InstrList.push_back(&*It); + CCVec.push_back(IllegalID--); + assert(CCID < IllegalID && "Instruction mapping overflow!"); } /// \brief Transforms a \p MachineBasicBlock into a \p vector of \p unsigneds - /// and appends it to \p UnsignedVec and \p InstrList. + /// and appends it to \p CCVec and \p InstrList. /// /// Two instructions are assigned the same integer if they are identical. /// If an instruction is deemed unsafe to outline, then it will be assigned an @@ -663,7 +127,6 @@ const TargetInstrInfo &TII) { for (MachineBasicBlock::iterator It = MBB.begin(), Et = MBB.end(); It != Et; It++) { - // Keep track of where this instruction is in the module. switch (TII.getOutliningType(*It)) { case TargetInstrInfo::MachineOutlinerInstrType::Illegal: @@ -677,25 +140,77 @@ case TargetInstrInfo::MachineOutlinerInstrType::Invisible: break; } + + // Make sure we don't overflow or use any integers reserved by the DenseMap. + if (CCID >= IllegalID) + report_fatal_error("Instruction mapping overflow!"); } // After we're done every insertion, uniquely terminate this part of the // "string". This makes sure we won't match across basic block or function // boundaries since the "end" is encoded uniquely and thus appears in no // repeated substring. - InstrList.push_back(MBB.end()); - UnsignedVec.push_back(IllegalInstrNumber); - IllegalInstrNumber--; + InstrList.push_back(nullptr); + CCVec.push_back(IllegalID--); } +}; - InstructionMapper() { - // Make sure that the implementation of DenseMapInfo hasn't - // changed. - assert(DenseMapInfo::getEmptyKey() == (unsigned)-1 && - "DenseMapInfo's empty key isn't -1!"); - assert(DenseMapInfo::getTombstoneKey() == (unsigned)-2 && - "DenseMapInfo's tombstone key isn't -2!"); +/// \brief Outliner specialized for sequenced machine instructions. +struct MachineSequenceOutliner : public SequencedOutlinerImpl { + MachineSequenceOutliner( + OutlinerMapper &OM, + function_ref OutlineFn, + const TargetInstrInfo *TII) + : SequencedOutlinerImpl(OM, /*MinInstructionLen*/2, /*MinOccurrences*/2, + SequencedOutlinerImpl::CSM_SuffixTree, OutlineFn), + TII(TII) {} + virtual ~MachineSequenceOutliner() {} + virtual void + analyzeCandidateList(std::vector &CL) override { + for (OutlineCandidate &OC : CL) + std::tie(OC.Benefit, OC.BenefitPerOccur) + = getBenefit(OC.Occurrences, OC.Len); } + + virtual bool prePrune(ArrayRef Idxs, + unsigned Size) override { + return getBenefit(Idxs, Size).first == 0; + }; + + std::pair getBenefit(ArrayRef Idxs, unsigned Size) { + MachineInstructionMapper &MIM = + getMapperAs(); + + unsigned CallOverhead = 0; + unsigned FrameOverhead = 0; + unsigned SequenceOverhead = Size; + size_t Occurrences = Idxs.size(); + + for(unsigned Idx : Idxs) { + MachineBasicBlock::iterator StartIt(MIM.InstrList[Idx]); + MachineBasicBlock::iterator EndIt(MIM.InstrList[Idx + Size - 1]); + CallOverhead += TII->getOutliningCallOverhead(StartIt, EndIt); + } + + // Figure out how many instructions it'll take to construct an outlined + // function frame for this sequence. + unsigned FirstOccur = Idxs.front(); + MachineBasicBlock::iterator StartIt = MIM.InstrList[FirstOccur]; + MachineBasicBlock::iterator EndIt = + MIM.InstrList[FirstOccur + Size - 1]; + FrameOverhead = TII->getOutliningFrameOverhead(StartIt, EndIt); + + unsigned OccurCallCost = TII->getOutliningCallOverhead(StartIt, EndIt); + unsigned OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead; + unsigned NotOutliningCost = SequenceOverhead * Occurrences; + + if (NotOutliningCost <= OutliningCost) + return std::make_pair(0u, 0u); + unsigned Benefit = NotOutliningCost - OutliningCost; + unsigned BenefitPerOccur = SequenceOverhead - OccurCallCost; + return std::make_pair(Benefit, BenefitPerOccur); + } + const TargetInstrInfo *TII; }; /// \brief An interprocedural pass which finds repeated sequences of @@ -708,96 +223,23 @@ /// \p MachineFunction and each instance is then replaced with a call to that /// function. struct MachineOutliner : public ModulePass { - static char ID; - StringRef getPassName() const override { return "Machine Outliner"; } - void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addPreserved(); AU.setPreservesAll(); ModulePass::getAnalysisUsage(AU); } - MachineOutliner() : ModulePass(ID) { initializeMachineOutlinerPass(*PassRegistry::getPassRegistry()); } - /// Find all repeated substrings that satisfy the outlining cost model. - /// - /// If a substring appears at least twice, then it must be represented by - /// an internal node which appears in at least two suffixes. Each suffix is - /// represented by a leaf node. To do this, we visit each internal node in - /// the tree, using the leaf children of each internal node. If an internal - /// node represents a beneficial substring, then we use each of its leaf - /// children to find the locations of its substring. - /// - /// \param ST A suffix tree to query. - /// \param TII TargetInstrInfo for the target. - /// \param Mapper Contains outlining mapping information. - /// \param[out] CandidateList Filled with candidates representing each - /// beneficial substring. - /// \param[out] FunctionList Filled with a list of \p OutlinedFunctions each - /// type of candidate. - /// - /// \returns The length of the longest candidate found. - size_t findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, - InstructionMapper &Mapper, - std::vector &CandidateList, - std::vector &FunctionList); - - /// \brief Replace the sequences of instructions represented by the - /// \p Candidates in \p CandidateList with calls to \p MachineFunctions - /// described in \p FunctionList. - /// - /// \param M The module we are outlining from. - /// \param CandidateList A list of candidates to be outlined. - /// \param FunctionList A list of functions to be inserted into the module. - /// \param Mapper Contains the instruction mappings for the module. - bool outline(Module &M, const ArrayRef &CandidateList, - std::vector &FunctionList, - InstructionMapper &Mapper); - /// Creates a function for \p OF and inserts it into the module. - MachineFunction *createOutlinedFunction(Module &M, const OutlinedFunction &OF, - InstructionMapper &Mapper); - - /// Find potential outlining candidates and store them in \p CandidateList. - /// - /// For each type of potential candidate, also build an \p OutlinedFunction - /// struct containing the information to build the function for that - /// candidate. - /// - /// \param[out] CandidateList Filled with outlining candidates for the module. - /// \param[out] FunctionList Filled with functions corresponding to each type - /// of \p Candidate. - /// \param ST The suffix tree for the module. - /// \param TII TargetInstrInfo for the module. - /// - /// \returns The length of the longest candidate found. 0 if there are none. - unsigned buildCandidateList(std::vector &CandidateList, - std::vector &FunctionList, - SuffixTree &ST, InstructionMapper &Mapper, - const TargetInstrInfo &TII); - - /// \brief Remove any overlapping candidates that weren't handled by the - /// suffix tree's pruning method. - /// - /// Pruning from the suffix tree doesn't necessarily remove all overlaps. - /// If a short candidate is chosen for outlining, then a longer candidate - /// which has that short candidate as a suffix is chosen, the tree's pruning - /// method will not find it. Thus, we need to prune before outlining as well. - /// - /// \param[in,out] CandidateList A list of outlining candidates. - /// \param[in,out] FunctionList A list of functions to be outlined. - /// \param Mapper Contains instruction mapping info for outlining. - /// \param MaxCandidateLen The length of the longest candidate. - /// \param TII TargetInstrInfo for the module. - void pruneOverlaps(std::vector &CandidateList, - std::vector &FunctionList, - InstructionMapper &Mapper, unsigned MaxCandidateLen, - const TargetInstrInfo &TII); + MachineFunction *createOutlinedFunction(Module &M, const OutlineCandidate &OF, + MachineInstructionMapper &Mapper, + size_t CandNum, bool IsTailCall, + unsigned FirstOccur); /// Construct a suffix tree on the instructions in \p M and outline repeated /// strings from that tree. @@ -807,276 +249,19 @@ } // Anonymous namespace. char MachineOutliner::ID = 0; - -namespace llvm { -ModulePass *createMachineOutlinerPass() { return new MachineOutliner(); } -} // namespace llvm - +ModulePass *llvm::createMachineOutlinerPass() { return new MachineOutliner(); } INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) -size_t -MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, - InstructionMapper &Mapper, - std::vector &CandidateList, - std::vector &FunctionList) { - - CandidateList.clear(); - FunctionList.clear(); - size_t FnIdx = 0; - size_t MaxLen = 0; - - // FIXME: Visit internal nodes instead of leaves. - for (SuffixTreeNode *Leaf : ST.LeafVector) { - assert(Leaf && "Leaves in LeafVector cannot be null!"); - if (!Leaf->IsInTree) - continue; - - assert(Leaf->Parent && "All leaves must have parents!"); - SuffixTreeNode &Parent = *(Leaf->Parent); - - // If it doesn't appear enough, or we already outlined from it, skip it. - if (Parent.OccurrenceCount < 2 || Parent.isRoot() || !Parent.IsInTree) - continue; - - // Figure out if this candidate is beneficial. - size_t StringLen = Leaf->ConcatLen - Leaf->size(); - size_t CallOverhead = 0; - size_t FrameOverhead = 0; - size_t SequenceOverhead = StringLen; - - // Figure out the call overhead for each instance of the sequence. - for (auto &ChildPair : Parent.Children) { - SuffixTreeNode *M = ChildPair.second; - - if (M && M->IsInTree && M->isLeaf()) { - // Each sequence is over [StartIt, EndIt]. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[M->SuffixIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[M->SuffixIdx + StringLen - 1]; - CallOverhead += TII.getOutliningCallOverhead(StartIt, EndIt); - } - } - - // Figure out how many instructions it'll take to construct an outlined - // function frame for this sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[Leaf->SuffixIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[Leaf->SuffixIdx + StringLen - 1]; - FrameOverhead = TII.getOutliningFrameOverhead(StartIt, EndIt); - - size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead; - size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; - - if (NotOutliningCost <= OutliningCost) - continue; - - size_t Benefit = NotOutliningCost - OutliningCost; - - if (StringLen > MaxLen) - MaxLen = StringLen; - - unsigned OccurrenceCount = 0; - for (auto &ChildPair : Parent.Children) { - SuffixTreeNode *M = ChildPair.second; - - // Is it a leaf? If so, we have an occurrence of this candidate. - if (M && M->IsInTree && M->isLeaf()) { - OccurrenceCount++; - CandidateList.emplace_back(M->SuffixIdx, StringLen, FnIdx); - CandidateList.back().Benefit = Benefit; - M->IsInTree = false; - } - } - - // Save the function for the new candidate sequence. - std::vector CandidateSequence; - for (unsigned i = Leaf->SuffixIdx; i < Leaf->SuffixIdx + StringLen; i++) - CandidateSequence.push_back(ST.Str[i]); - - FunctionList.emplace_back(FnIdx, OccurrenceCount, CandidateSequence, - Benefit, false); - - // Move to the next function. - FnIdx++; - Parent.IsInTree = false; - } - - return MaxLen; -} - -void MachineOutliner::pruneOverlaps(std::vector &CandidateList, - std::vector &FunctionList, - InstructionMapper &Mapper, - unsigned MaxCandidateLen, - const TargetInstrInfo &TII) { - // TODO: Experiment with interval trees or other interval-checking structures - // to lower the time complexity of this function. - // TODO: Can we do better than the simple greedy choice? - // Check for overlaps in the range. - // This is O(MaxCandidateLen * CandidateList.size()). - for (auto It = CandidateList.begin(), Et = CandidateList.end(); It != Et; - It++) { - Candidate &C1 = *It; - OutlinedFunction &F1 = FunctionList[C1.FunctionIdx]; - - // If we removed this candidate, skip it. - if (!C1.InCandidateList) - continue; - - // Is it still worth it to outline C1? - if (F1.Benefit < 1 || F1.OccurrenceCount < 2) { - assert(F1.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F1.OccurrenceCount--; - C1.InCandidateList = false; - continue; - } - - // The minimum start index of any candidate that could overlap with this - // one. - unsigned FarthestPossibleIdx = 0; - - // Either the index is 0, or it's at most MaxCandidateLen indices away. - if (C1.StartIdx > MaxCandidateLen) - FarthestPossibleIdx = C1.StartIdx - MaxCandidateLen; - - // Compare against the candidates in the list that start at at most - // FarthestPossibleIdx indices away from C1. There are at most - // MaxCandidateLen of these. - for (auto Sit = It + 1; Sit != Et; Sit++) { - Candidate &C2 = *Sit; - OutlinedFunction &F2 = FunctionList[C2.FunctionIdx]; - - // Is this candidate too far away to overlap? - if (C2.StartIdx < FarthestPossibleIdx) - break; - - // Did we already remove this candidate in a previous step? - if (!C2.InCandidateList) - continue; - - // Is the function beneficial to outline? - if (F2.OccurrenceCount < 2 || F2.Benefit < 1) { - // If not, remove this candidate and move to the next one. - assert(F2.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F2.OccurrenceCount--; - C2.InCandidateList = false; - continue; - } - - size_t C2End = C2.StartIdx + C2.Len - 1; - - // Do C1 and C2 overlap? - // - // Not overlapping: - // High indices... [C1End ... C1Start][C2End ... C2Start] ...Low indices - // - // We sorted our candidate list so C2Start <= C1Start. We know that - // C2End > C2Start since each candidate has length >= 2. Therefore, all we - // have to check is C2End < C2Start to see if we overlap. - if (C2End < C1.StartIdx) - continue; - - // C1 and C2 overlap. - // We need to choose the better of the two. - // - // Approximate this by picking the one which would have saved us the - // most instructions before any pruning. - if (C1.Benefit >= C2.Benefit) { - - // C1 is better, so remove C2 and update C2's OutlinedFunction to - // reflect the removal. - assert(F2.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F2.OccurrenceCount--; - - // Remove the call overhead from the removed sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[C2.StartIdx + C2.Len - 1]; - F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt); - // Add back one instance of the sequence. - - if (F2.Sequence.size() > F2.Benefit) - F2.Benefit = 0; - else - F2.Benefit -= F2.Sequence.size(); - - C2.InCandidateList = false; - - DEBUG(dbgs() << "- Removed C2. \n"; - dbgs() << "--- Num fns left for C2: " << F2.OccurrenceCount - << "\n"; - dbgs() << "--- C2's benefit: " << F2.Benefit << "\n";); - - } else { - // C2 is better, so remove C1 and update C1's OutlinedFunction to - // reflect the removal. - assert(F1.OccurrenceCount > 0 && - "Can't remove OutlinedFunction with no occurrences!"); - F1.OccurrenceCount--; - - // Remove the call overhead from the removed sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[C1.StartIdx + C1.Len - 1]; - F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt); - - // Add back one instance of the sequence. - if (F1.Sequence.size() > F1.Benefit) - F1.Benefit = 0; - else - F1.Benefit -= F1.Sequence.size(); - - C1.InCandidateList = false; - - DEBUG(dbgs() << "- Removed C1. \n"; - dbgs() << "--- Num fns left for C1: " << F1.OccurrenceCount - << "\n"; - dbgs() << "--- C1's benefit: " << F1.Benefit << "\n";); - - // C1 is out, so we don't have to compare it against anyone else. - break; - } - } - } -} - -unsigned -MachineOutliner::buildCandidateList(std::vector &CandidateList, - std::vector &FunctionList, - SuffixTree &ST, InstructionMapper &Mapper, - const TargetInstrInfo &TII) { - - std::vector CandidateSequence; // Current outlining candidate. - size_t MaxCandidateLen = 0; // Length of the longest candidate. - - MaxCandidateLen = - findCandidates(ST, TII, Mapper, CandidateList, FunctionList); - - for (auto &OF : FunctionList) - OF.IsTailCall = - Mapper.IntegerInstructionMap[OF.Sequence.back()]->isTerminator(); - - // Sort the candidates in decending order. This will simplify the outlining - // process when we have to remove the candidates from the mapping by - // allowing us to cut them out without keeping track of an offset. - std::stable_sort(CandidateList.begin(), CandidateList.end()); - - return MaxCandidateLen; -} - -MachineFunction * -MachineOutliner::createOutlinedFunction(Module &M, const OutlinedFunction &OF, - InstructionMapper &Mapper) { +MachineFunction *MachineOutliner::createOutlinedFunction( + Module &M, const OutlineCandidate &OF, MachineInstructionMapper &OM, + size_t CandNum, bool IsTailCall, unsigned FirstOccur) { // Create the function name. This should be unique. For now, just hash the // module name and include it in the function name plus the number of this // function. std::ostringstream NameStream; - NameStream << "OUTLINED_FUNCTION_" << OF.Name; + NameStream << "OUTLINED_FUNCTION_" << CandNum; // Create the function using an IR-level function. LLVMContext &C = M.getContext(); @@ -1101,14 +286,13 @@ // Insert the new function into the module. MF.insert(MF.begin(), &MBB); - - TII.insertOutlinerPrologue(MBB, MF, OF.IsTailCall); + TII.insertOutlinerPrologue(MBB, MF, IsTailCall); // Copy over the instructions for the function using the integer mappings in // its sequence. - for (unsigned Str : OF.Sequence) { - MachineInstr *NewMI = - MF.CloneMachineInstr(Mapper.IntegerInstructionMap.find(Str)->second); + for (unsigned InstrIdx = FirstOccur, InstrE = FirstOccur + OF.Len; + InstrIdx < InstrE; ++InstrIdx) { + MachineInstr *NewMI = MF.CloneMachineInstr(OM.InstrList[InstrIdx]); NewMI->dropMemRefs(); // Don't keep debug information for outlined instructions. @@ -1117,72 +301,11 @@ MBB.insert(MBB.end(), NewMI); } - TII.insertOutlinerEpilogue(MBB, MF, OF.IsTailCall); - + TII.insertOutlinerEpilogue(MBB, MF, IsTailCall); return &MF; } -bool MachineOutliner::outline(Module &M, - const ArrayRef &CandidateList, - std::vector &FunctionList, - InstructionMapper &Mapper) { - - bool OutlinedSomething = false; - - // Replace the candidates with calls to their respective outlined functions. - for (const Candidate &C : CandidateList) { - - // Was the candidate removed during pruneOverlaps? - if (!C.InCandidateList) - continue; - - // If not, then look at its OutlinedFunction. - OutlinedFunction &OF = FunctionList[C.FunctionIdx]; - - // Was its OutlinedFunction made unbeneficial during pruneOverlaps? - if (OF.OccurrenceCount < 2 || OF.Benefit < 1) - continue; - - // If not, then outline it. - assert(C.StartIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); - MachineBasicBlock *MBB = (*Mapper.InstrList[C.StartIdx]).getParent(); - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C.StartIdx]; - unsigned EndIdx = C.StartIdx + C.Len - 1; - - assert(EndIdx < Mapper.InstrList.size() && "Candidate out of bounds!"); - MachineBasicBlock::iterator EndIt = Mapper.InstrList[EndIdx]; - assert(EndIt != MBB->end() && "EndIt out of bounds!"); - - EndIt++; // Erase needs one past the end index. - - // Does this candidate have a function yet? - if (!OF.MF) { - OF.MF = createOutlinedFunction(M, OF, Mapper); - FunctionsCreated++; - } - - MachineFunction *MF = OF.MF; - const TargetSubtargetInfo &STI = MF->getSubtarget(); - const TargetInstrInfo &TII = *STI.getInstrInfo(); - - // Insert a call to the new function and erase the old sequence. - TII.insertOutlinedCall(M, *MBB, StartIt, *MF, OF.IsTailCall); - StartIt = Mapper.InstrList[C.StartIdx]; - MBB->erase(StartIt, EndIt); - - OutlinedSomething = true; - - // Statistics. - NumOutlined++; - } - - DEBUG(dbgs() << "OutlinedSomething = " << OutlinedSomething << "\n";); - - return OutlinedSomething; -} - bool MachineOutliner::runOnModule(Module &M) { - // Is there anything in the module at all? if (M.empty()) return false; @@ -1193,7 +316,7 @@ const TargetRegisterInfo *TRI = STI.getRegisterInfo(); const TargetInstrInfo *TII = STI.getInstrInfo(); - InstructionMapper Mapper; + MachineInstructionMapper OM; // Build instruction mappings for each function in the module. for (Function &F : M) { @@ -1204,29 +327,44 @@ continue; // If it is, look at each MachineBasicBlock in the function. - for (MachineBasicBlock &MBB : MF) { - - // Is there anything in MBB? - if (MBB.empty()) - continue; - - // If yes, map it. - Mapper.convertToUnsignedVec(MBB, *TRI, *TII); - } + for (MachineBasicBlock &MBB : MF) + if (!MBB.empty()) + OM.convertToUnsignedVec(MBB, *TRI, *TII); } - // Construct a suffix tree, use it to find candidates, and then outline them. - SuffixTree ST(Mapper.UnsignedVec); - std::vector CandidateList; - std::vector FunctionList; - - // Find all of the outlining candidates. - unsigned MaxCandidateLen = - buildCandidateList(CandidateList, FunctionList, ST, Mapper, *TII); - - // Remove candidates that overlap with other candidates. - pruneOverlaps(CandidateList, FunctionList, Mapper, MaxCandidateLen, *TII); + // Functor to be used when outlining a new candidate. + std::function + OutlineFn = [&](OutlineCandidate &Cand, size_t CandNum) { + unsigned FirstOccur = Cand.getOccurrence(0); + bool IsTailCall = + OM.InstrList[FirstOccur + Cand.Len - 1]->isTerminator(); + MachineFunction *MF = createOutlinedFunction(M, Cand, OM, CandNum, + IsTailCall, FirstOccur); + const TargetSubtargetInfo &STI = MF->getSubtarget(); + const TargetInstrInfo &TII = *STI.getInstrInfo(); + + FunctionsCreated++; + for (unsigned Occur : Cand) { + MachineBasicBlock *MBB = OM.InstrList[Occur]->getParent(); + MachineBasicBlock::iterator StartIt(OM.InstrList[Occur]); + unsigned EndIdx = Occur + Cand.Len - 1; + + assert(EndIdx < OM.InstrList.size() && "Candidate out of bounds!"); + MachineBasicBlock::iterator EndIt = + OM.InstrList[EndIdx]->getIterator(); + assert(EndIt != MBB->end() && "EndIt out of bounds!"); + EndIt++; // Erase needs one past the end index. + + // Insert a call to the new function and erase the old sequence. + TII.insertOutlinedCall(M, *MBB, StartIt, *MF, IsTailCall); + StartIt = OM.InstrList[Occur]->getIterator(); + MBB->erase(StartIt, EndIt); + + // Statistics. + NumOutlined++; + } + }; - // Outline each of the candidates and return true if something was outlined. - return outline(M, CandidateList, FunctionList, Mapper); + MachineSequenceOutliner MSM(OM, OutlineFn, TII); + return MSM.run(); } Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -38,6 +38,7 @@ ModuleUtils.cpp NameAnonGlobals.cpp OrderedInstructions.cpp + Outliner.cpp PredicateInfo.cpp PromoteMemoryToRegister.cpp StripGCRelocates.cpp Index: lib/Transforms/Utils/Outliner.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/Outliner.cpp @@ -0,0 +1,913 @@ +//===- Outliner.cpp - Generic outliner interface -----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements an interface for outlining congruent sequences. +// +//===----------------------------------------------------------------------===// + +#include +#include "llvm/Transforms/Utils/Outliner.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/PriorityQueue.h" + +using namespace llvm; + +bool OutlinerImpl::run() { + std::vector CandidateList; + // 1) Find candidates. + findOutliningOccurrences(CandidateList); + if (CandidateList.empty()) + return false; + // 2) Analyze for benefit. + analyzeCandidateList(CandidateList); + // 3) Prune and Outline. + return pruneAndOutline(CandidateList); +} + +// Suffix tree implementation. +namespace { + +/// Represents an undefined index in the suffix tree. +const size_t EmptyIdx = -1; + +/// A node in a suffix tree which represents a substring or suffix. +/// +/// Each node has either no children or at least two children, with the root +/// being a exception in the empty tree. +/// +/// Children are represented as a map between unsigned integers and nodes. If +/// a node N has a child M on unsigned integer k, then the mapping represented +/// by N is a proper prefix of the mapping represented by M. Note that this, +/// although similar to a trie is somewhat different: each node stores a full +/// substring of the full mapping rather than a single character state. +/// +/// Each internal node contains a pointer to the internal node representing +/// the same string, but with the first character chopped off. This is stored +/// in \p Link. Each leaf node stores the start index of its respective +/// suffix in \p SuffixIdx. +struct SuffixTreeNode { + + /// The children of this node. + /// + /// A child existing on an unsigned integer implies that from the mapping + /// represented by the current node, there is a way to reach another + /// mapping by tacking that character on the end of the current string. + DenseMap Children; + + /// A flag set to false if the node has been pruned from the tree. + bool IsInTree = true; + + /// The start index of this node's substring in the main string. + size_t StartIdx = EmptyIdx; + + /// The end index of this node's substring in the main string. + /// + /// Every leaf node must have its \p EndIdx incremented at the end of every + /// step in the construction algorithm. To avoid having to update O(N) + /// nodes individually at the end of every step, the end index is stored + /// as a pointer. + size_t *EndIdx = nullptr; + + /// For leaves, the start index of the suffix represented by this node. + /// + /// For all other nodes, this is ignored. + size_t SuffixIdx = EmptyIdx; + + /// \brief For internal nodes, a pointer to the internal node representing + /// the same sequence with the first character chopped off. + /// + /// This acts as a shortcut in Ukkonen's algorithm. One of the things that + /// Ukkonen's algorithm does to achieve linear-time construction is + /// keep track of which node the next insert should be at. This makes each + /// insert O(1), and there are a total of O(N) inserts. The suffix link + /// helps with inserting children of internal nodes. + /// + /// Say we add a child to an internal node with associated mapping S. The + /// next insertion must be at the node representing S - its first character. + /// This is given by the way that we iteratively build the tree in Ukkonen's + /// algorithm. The main idea is to look at the suffixes of each prefix in the + /// string, starting with the longest suffix of the prefix, and ending with + /// the shortest. Therefore, if we keep pointers between such nodes, we can + /// move to the next insertion point in O(1) time. If we don't, then we'd + /// have to query from the root, which takes O(N) time. This would make the + /// construction algorithm O(N^2) rather than O(N). + SuffixTreeNode *Link = nullptr; + + /// The parent of this node. Every node except for the root has a parent. + SuffixTreeNode *Parent = nullptr; + + /// The number of times this node's string appears in the tree. + /// + /// This is equal to the number of leaf children of the string. It represents + /// the number of suffixes that the node's string is a prefix of. + size_t OccurrenceCount = 0; + + /// The length of the string formed by concatenating the edge labels from the + /// root to this node. + size_t ConcatLen = 0; + + /// Returns true if this node is a leaf. + bool isLeaf() const { return SuffixIdx != EmptyIdx; } + + /// Returns true if this node is the root of its owning \p SuffixTree. + bool isRoot() const { return StartIdx == EmptyIdx; } + + /// Return the number of elements in the substring associated with this node. + size_t size() const { + + // Is it the root? If so, it's the empty string so return 0. + if (isRoot()) + return 0; + + assert(*EndIdx != EmptyIdx && "EndIdx is undefined!"); + + // Size = the number of elements in the string. + // For example, [0 1 2 3] has length 4, not 3. 3-0 = 3, so we have 3-0+1. + return *EndIdx - StartIdx + 1; + } + + SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link, + SuffixTreeNode *Parent) + : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {} + + SuffixTreeNode() {} +}; + +/// A data structure for fast substring queries. +/// +/// Suffix trees represent the suffixes of their input strings in their leaves. +/// A suffix tree is a type of compressed trie structure where each node +/// represents an entire substring rather than a single character. Each leaf +/// of the tree is a suffix. +/// +/// A suffix tree can be seen as a type of state machine where each state is a +/// substring of the full string. The tree is structured so that, for a string +/// of length N, there are exactly N leaves in the tree. This structure allows +/// us to quickly find repeated substrings of the input string. +/// +/// In this implementation, a "string" is a vector of unsigned integers. +/// These integers may result from hashing some data type. A suffix tree can +/// contain 1 or many strings, which can then be queried as one large string. +/// +/// The suffix tree is implemented using Ukkonen's algorithm for linear-time +/// suffix tree construction. Ukkonen's algorithm is explained in more detail +/// in the paper by Esko Ukkonen "On-line construction of suffix trees. The +/// paper is available at +/// +/// https://www.cs.helsinki.fi/u/ukkonen/SuffixT1withFigs.pdf +class SuffixTree { +public: + /// Stores each leaf node in the tree. + /// + /// This is used for finding outlining candidates. + std::vector LeafVector; + + /// Each element is an integer representing an instruction in the module. + ArrayRef Str; + +private: + /// Maintains each node in the tree. + SpecificBumpPtrAllocator NodeAllocator; + + /// The root of the suffix tree. + /// + /// The root represents the empty string. It is maintained by the + /// \p NodeAllocator like every other node in the tree. + SuffixTreeNode *Root = nullptr; + + /// Maintains the end indices of the internal nodes in the tree. + /// + /// Each internal node is guaranteed to never have its end index change + /// during the construction algorithm; however, leaves must be updated at + /// every step. Therefore, we need to store leaf end indices by reference + /// to avoid updating O(N) leaves at every step of construction. Thus, + /// every internal node must be allocated its own end index. + BumpPtrAllocator InternalEndIdxAllocator; + + /// The end index of each leaf in the tree. + size_t LeafEndIdx = -1; + + /// \brief Helper struct which keeps track of the next insertion point in + /// Ukkonen's algorithm. + struct ActiveState { + /// The next node to insert at. + SuffixTreeNode *Node; + + /// The index of the first character in the substring currently being added. + size_t Idx = EmptyIdx; + + /// The length of the substring we have to add at the current step. + size_t Len = 0; + }; + + /// \brief The point the next insertion will take place at in the + /// construction algorithm. + ActiveState Active; + + /// Allocate a leaf node and add it to the tree. + /// + /// \param Parent The parent of this node. + /// \param StartIdx The start index of this node's associated string. + /// \param Edge The label on the edge leaving \p Parent to this node. + /// + /// \returns A pointer to the allocated leaf node. + SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx, + unsigned Edge) { + + assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); + + SuffixTreeNode *N = new (NodeAllocator.Allocate()) + SuffixTreeNode(StartIdx, &LeafEndIdx, nullptr, &Parent); + Parent.Children[Edge] = N; + + return N; + } + + /// Allocate an internal node and add it to the tree. + /// + /// \param Parent The parent of this node. Only null when allocating the root. + /// \param StartIdx The start index of this node's associated string. + /// \param EndIdx The end index of this node's associated string. + /// \param Edge The label on the edge leaving \p Parent to this node. + /// + /// \returns A pointer to the allocated internal node. + SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx, + size_t EndIdx, unsigned Edge) { + + assert(StartIdx <= EndIdx && "String can't start after it ends!"); + assert(!(!Parent && StartIdx != EmptyIdx) && + "Non-root internal nodes must have parents!"); + + size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx); + SuffixTreeNode *N = new (NodeAllocator.Allocate()) + SuffixTreeNode(StartIdx, E, Root, Parent); + if (Parent) + Parent->Children[Edge] = N; + + return N; + } + + /// \brief Set the suffix indices of the leaves to the start indices of their + /// respective suffixes. Also stores each leaf in \p LeafVector at its + /// respective suffix index. + /// + /// \param[in] CurrNode The node currently being visited. + /// \param CurrIdx The current index of the string being visited. + void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) { + + bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); + + // Store the length of the concatenation of all strings from the root to + // this node. + if (!CurrNode.isRoot()) { + if (CurrNode.ConcatLen == 0) + CurrNode.ConcatLen = CurrNode.size(); + + if (CurrNode.Parent) + CurrNode.ConcatLen += CurrNode.Parent->ConcatLen; + } + + // Traverse the tree depth-first. + for (auto &ChildPair : CurrNode.Children) { + assert(ChildPair.second && "Node had a null child!"); + setSuffixIndices(*ChildPair.second, CurrIdx + ChildPair.second->size()); + } + + // Is this node a leaf? + if (IsLeaf) { + // If yes, give it a suffix index and bump its parent's occurrence count. + CurrNode.SuffixIdx = Str.size() - CurrIdx; + assert(CurrNode.Parent && "CurrNode had no parent!"); + CurrNode.Parent->OccurrenceCount++; + + // Store the leaf in the leaf vector for pruning later. + LeafVector[CurrNode.SuffixIdx] = &CurrNode; + } + } + + /// \brief Construct the suffix tree for the prefix of the input ending at + /// \p EndIdx. + /// + /// Used to construct the full suffix tree iteratively. At the end of each + /// step, the constructed suffix tree is either a valid suffix tree, or a + /// suffix tree with implicit suffixes. At the end of the final step, the + /// suffix tree is a valid tree. + /// + /// \param EndIdx The end index of the current prefix in the main string. + /// \param SuffixesToAdd The number of suffixes that must be added + /// to complete the suffix tree at the current phase. + /// + /// \returns The number of suffixes that have not been added at the end of + /// this step. + unsigned extend(size_t EndIdx, size_t SuffixesToAdd) { + SuffixTreeNode *NeedsLink = nullptr; + + while (SuffixesToAdd > 0) { + + // Are we waiting to add anything other than just the last character? + if (Active.Len == 0) { + // If not, then say the active index is the end index. + Active.Idx = EndIdx; + } + + assert(Active.Idx <= EndIdx && "Start index can't be after end index!"); + + // The first character in the current substring we're looking at. + unsigned FirstChar = Str[Active.Idx]; + + // Have we inserted anything starting with FirstChar at the current node? + if (Active.Node->Children.count(FirstChar) == 0) { + // If not, then we can just insert a leaf and move too the next step. + insertLeaf(*Active.Node, EndIdx, FirstChar); + + // The active node is an internal node, and we visited it, so it must + // need a link if it doesn't have one. + if (NeedsLink) { + NeedsLink->Link = Active.Node; + NeedsLink = nullptr; + } + } else { + // There's a match with FirstChar, so look for the point in the tree to + // insert a new node. + SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; + + size_t SubstringLen = NextNode->size(); + + // Is the current suffix we're trying to insert longer than the size of + // the child we want to move to? + if (Active.Len >= SubstringLen) { + // If yes, then consume the characters we've seen and move to the next + // node. + Active.Idx += SubstringLen; + Active.Len -= SubstringLen; + Active.Node = NextNode; + continue; + } + + // Otherwise, the suffix we're trying to insert must be contained in the + // next node we want to move to. + unsigned LastChar = Str[EndIdx]; + + // Is the string we're trying to insert a substring of the next node? + if (Str[NextNode->StartIdx + Active.Len] == LastChar) { + // If yes, then we're done for this step. Remember our insertion point + // and move to the next end index. At this point, we have an implicit + // suffix tree. + if (NeedsLink && !Active.Node->isRoot()) { + NeedsLink->Link = Active.Node; + NeedsLink = nullptr; + } + + Active.Len++; + break; + } + + // The string we're trying to insert isn't a substring of the next node, + // but matches up to a point. Split the node. + // + // For example, say we ended our search at a node n and we're trying to + // insert ABD. Then we'll create a new node s for AB, reduce n to just + // representing C, and insert a new leaf node l to represent d. This + // allows us to ensure that if n was a leaf, it remains a leaf. + // + // | ABC ---split---> | AB + // n s + // C / \ D + // n l + + // The node s from the diagram + SuffixTreeNode *SplitNode = + insertInternalNode(Active.Node, NextNode->StartIdx, + NextNode->StartIdx + Active.Len - 1, FirstChar); + + // Insert the new node representing the new substring into the tree as + // a child of the split node. This is the node l from the diagram. + insertLeaf(*SplitNode, EndIdx, LastChar); + + // Make the old node a child of the split node and update its start + // index. This is the node n from the diagram. + NextNode->StartIdx += Active.Len; + NextNode->Parent = SplitNode; + SplitNode->Children[Str[NextNode->StartIdx]] = NextNode; + + // SplitNode is an internal node, update the suffix link. + if (NeedsLink) + NeedsLink->Link = SplitNode; + + NeedsLink = SplitNode; + } + + // We've added something new to the tree, so there's one less suffix to + // add. + SuffixesToAdd--; + + if (Active.Node->isRoot()) { + if (Active.Len > 0) { + Active.Len--; + Active.Idx = EndIdx - SuffixesToAdd + 1; + } + } else { + // Start the next phase at the next smallest suffix. + Active.Node = Active.Node->Link; + } + } + + return SuffixesToAdd; + } + +public: + /// Construct a suffix tree from a sequence of unsigned integers. + /// + /// \param Str The string to construct the suffix tree for. + SuffixTree(ArrayRef Str) : Str(Str) { + Root = insertInternalNode(nullptr, EmptyIdx, EmptyIdx, 0); + Root->IsInTree = true; + Active.Node = Root; + LeafVector = std::vector(Str.size()); + + // Keep track of the number of suffixes we have to add of the current + // prefix. + size_t SuffixesToAdd = 0; + Active.Node = Root; + + // Construct the suffix tree iteratively on each prefix of the string. + // PfxEndIdx is the end index of the current prefix. + // End is one past the last element in the string. + for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) { + SuffixesToAdd++; + LeafEndIdx = PfxEndIdx; // Extend each of the leaves. + SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); + } + + // Set the suffix indices of each leaf. + assert(Root && "Root node can't be nullptr!"); + setSuffixIndices(*Root, 0); + } +}; +} + +// Suffix array implementation. +namespace { +/// \brief Compute the suffix array. +// Basic adapted implementation of SA-IS algorithm. +class SuffixArray { +public: + // Compute the suffix array of /p S with given alphabet size /p AlphabetSize + // and store the result in /p SA + static void compute(ArrayRef S, std::vector &SA, + unsigned AlphabetSize) { + SuffixArray SACtr(S.size(), SA); + SACtr.computeSAIS(S, S.size(), AlphabetSize); + } + +private: + SuffixArray(size_t ArraySize, std::vector &SA) : SA(SA) { + SA.resize(ArraySize); + } + + template + void computeSAIS(ArrayRef S, int N, unsigned AlphabetSize) { + // Bitvector for LS-type array. + BitVector LTypeArray(N); + + // Classify each character from S as either LType or SType. + LTypeArray.set(N - 1); + for (int i = N - 3, e = 0; i >= e; --i) { + // S(i) is type S iff: S(i) < S(i+1) or S(i)==S(i+1) and S(i+1) is type + // S + if (S[i] < S[i + 1] || (S[i] == S[i + 1] && LTypeArray.test(i + 1))) + LTypeArray.set(i); + } + + // Stage 1: Reduce the problem and bucket sort all S-substrings. + Bucket.resize(AlphabetSize + 1); + /// Get the bucket ends. + getBuckets(S, true, N, AlphabetSize); + for (int i = 0; i < N; ++i) + SA[i] = -1; + for (int i = 1; i < N; ++i) + if (isLMS(i, LTypeArray)) + SA[--Bucket[S[i]]] = i; + induceSA(S, LTypeArray, N, AlphabetSize); + Bucket.clear(); + + /// Compact the sorted substrings into the first N1 items of the suffix + /// array. + int N1 = 0; + for (int i = 0; i < N; ++i) + if (isLMS(SA[i], LTypeArray)) + SA[N1++] = SA[i]; + + /// Find the lexicographic names of the substrings. + for (int i = N1; i < N; ++i) + SA[i] = -1; + int Name = 0, Prev = -1; + for (int i = 0; i < N1; ++i) { + int Pos = SA[i]; + for (int d = 0; d < N; ++d) { + if (Prev == -1 || S[Pos + d] != S[Prev + d] || + LTypeArray.test(Pos + d) != LTypeArray.test(Prev + d)) { + ++Name; + Prev = Pos; + break; + } + if (d > 0 && + (isLMS(Pos + d, LTypeArray) || isLMS(Prev + d, LTypeArray))) + break; + } + Pos = (Pos % 2 == 0) ? Pos / 2 : (Pos - 1) / 2; + SA[N1 + Pos] = Name - 1; + } + for (int i = N - 1, j = i; i >= N1; --i) + if (SA[i] >= 0) + SA[j--] = SA[i]; + + // Stage 2: Solve the reduced problem. + /// If the names aren't unique enough yet, we recurse until they are. + size_t S1Start = N - N1; + int *S1 = SA.data() + S1Start; + if (Name < N1) + computeSAIS(ArrayRef(S1, N1), N1, Name - 1); + // Otherwise we can compute the suffix array directly. + else { + for (int i = 0; i < N1; ++i) + SA[S1[i]] = i; + } + + // Stage 3: Induce the result from the reduced solution. + Bucket.resize(AlphabetSize + 1); + /// Place the LMS characters into their respective buckets. + getBuckets(S, true, N, AlphabetSize); + /// Get P1. + for (int i = 1, j = 0; i < N; ++i) + if (isLMS(i, LTypeArray)) + S1[j++] = i; + /// Get the index in S. + for (int i = 0; i < N1; ++i) + SA[i] = S1[SA[i]]; + /// Initialize the suffix array from N1 to N - 1. + for (int i = N1; i < N; ++i) + SA[i] = -1; + for (int i = N1 - 1; i >= 0; --i) { + int j = SA[i]; + SA[i] = -1; + SA[--Bucket[S[j]]] = j; + } + induceSA(S, LTypeArray, N, AlphabetSize); + } + + // Check to see if S(Idx) is a left most S-type character. + bool isLMS(int Idx, BitVector <ypeArray) { + return Idx > 0 && LTypeArray.test(Idx) && !LTypeArray.test(Idx - 1); + } + template + void getBuckets(ArrayRef S, bool End, unsigned N, unsigned AlphabetSize) { + /// Clear buckets. + Bucket.assign(AlphabetSize + 1, 0); + /// Compute the size of each bucket. + for (size_t i = 0, e = S.size(); i < e; ++i) + ++Bucket[S[i]]; + int Sum = 0; + if (!End) { + for (size_t i = 0, e = AlphabetSize + 1; i < e; ++i) { + Sum += Bucket[i]; + Bucket[i] = Sum - Bucket[i]; + } + } else + for (size_t i = 0; i <= AlphabetSize; ++i) + Bucket[i] = Sum += Bucket[i]; + } + + // Compute SA1 + template + void induceSA(ArrayRef S, BitVector <ypeArray, unsigned N, + unsigned AlphabetSize) { + // Induce SA1 + getBuckets(S, false, N, AlphabetSize); + for (size_t i = 0; i < N; ++i) { + int j = SA[i] - 1; + if (j >= 0 && !LTypeArray.test(j)) + SA[Bucket[S[j]]++] = j; + } + // Induce Sas + getBuckets(S, true, N, AlphabetSize); + for (ssize_t i = N - 1; i >= 0; --i) { + int j = SA[i] - 1; + if (j >= 0 && LTypeArray.test(j)) + SA[--Bucket[S[j]]] = j; + } + } + std::vector &SA; + std::vector Bucket; +}; +// Construct the LCP array for a given suffix array /p SA and string /p S. +static std::vector computeLCP(ArrayRef S, ArrayRef SA) { + int N = S.size(); + std::vector LCP(N), Rank(N); + for (int i = 0; i < N; ++i) + Rank[SA[i]] = i; + for (int i = 0, k = 0; i < N; ++i) { + if (Rank[i] == N - 1) { + k = 0; + continue; + } + int j = SA[Rank[i] + 1]; + while (i + k < N && j + k < N && S[i + k] == S[j + k]) + ++k; + LCP[Rank[i]] = k; + if (k > 0) + --k; + } + return LCP; +} +} // namespace + +void SequencedOutlinerImpl::OutlinerMapper::prepareForCandidateSelection() { + // We regroup the illegal indices so that our alphabet is of a defined size. + unsigned Diff = (UINT_MAX - IllegalID); + for (unsigned &InstId : CCVec) { + if (InstId > IllegalID) + InstId = 1 + (UINT_MAX - InstId); + else + InstId += Diff; + } + CCID += Diff; + + // REQUIRED: N-1 must be 0 to act as a sentinel for the suffix array + // algorithm. + CCVec.push_back(0); +} + +void SequencedOutlinerImpl::findSTCandidates(std::vector &CL) { + ArrayRef CongruencyVec = OM.getCongruencyVector(); + SuffixTree ST(CongruencyVec); + + // An interval tree of our current candidates. + BitVector CandidateInterval(CongruencyVec.size()); + + std::vector Occurrences; + // FIXME: Visit internal nodes instead of leaves. + for (SuffixTreeNode *Leaf : ST.LeafVector) { + assert(Leaf && "Leaves in LeafVector cannot be null!"); + if (!Leaf->IsInTree) + continue; + + assert(Leaf->Parent && "All leaves must have parents!"); + SuffixTreeNode &Parent = *(Leaf->Parent); + + // If it doesn't appear enough, or we already outlined from it, skip it. + if (Parent.OccurrenceCount < MinOccurrences || + Parent.isRoot() || !Parent.IsInTree) + continue; + + // How many instructions would outlining this string save? + size_t StringLen = Leaf->ConcatLen - Leaf->size(); + + Occurrences.clear(); + for (auto &ChildPair : Parent.Children) { + SuffixTreeNode *M = ChildPair.second; + unsigned Idx = M->SuffixIdx; + + // Is it a leaf? If so, we have an occurrence of this candidate. + if (M && M->IsInTree && M->isLeaf() + && CandidateInterval.find_first_in(Idx, Idx + StringLen) == -1) { + CandidateInterval.set(Idx, Idx + StringLen); + Occurrences.push_back(Idx); + M->IsInTree = false; + } + } + for(unsigned Idx : CandidateInterval.set_bits()) + CandidateInterval.reset(Idx, Idx + StringLen); + + // If it's not beneficial, skip it. + if (prePrune(Occurrences, StringLen)) + continue; + + CL.emplace_back(StringLen, Occurrences); + + // Move to the next function. + Parent.IsInTree = false; + } +} +void SequencedOutlinerImpl::findSACandidates(std::vector &CL) { + // Build the suffix array and longest common prefix array. + ArrayRef CongruencyVec = OM.getCongruencyVector(); + std::vector SuffixArr, LcpArr; + SuffixArray::compute(CongruencyVec, SuffixArr, OM.getNumCongruencyClasses()); + LcpArr = computeLCP(CongruencyVec, SuffixArr); + + // An interval tree of our current candidates. + BitVector CandidateInterval(CongruencyVec.size()); + + // Try to guess the amount of candidates we could have for this module. + // * Tuned via clang build * + size_t NumPotentialOccurrences = 0, CurrentSizeSeq = 1; + std::for_each(LcpArr.begin(), LcpArr.end(), [&](unsigned Size) { + if (Size >= MinInstructionLen) + NumPotentialOccurrences += ++CurrentSizeSeq; + else + CurrentSizeSeq = 1; + }); + CL.reserve(NumPotentialOccurrences * 0.025); + + // Walk the suffix array to build potential candidates. + SmallDenseSet FailedOccurrences; + size_t PrevSize = 0; + std::vector Occurrences; + for (size_t i = 1, e = SuffixArr.size(); i < e; ++i) { + size_t Size = LcpArr[i]; + + // Preskip invalid size. + if (Size < MinInstructionLen) { + PrevSize = 0; + continue; + } + + size_t OccurIdx = SuffixArr[i]; + + // We have already matched against this size. + if (PrevSize >= Size) { + PrevSize = Size; + continue; + } + + // Create a new interval tree with our current candidate to pre prune + // overlaps. + Occurrences.clear(); + CandidateInterval.set(OccurIdx, OccurIdx + Size); + Occurrences.push_back(OccurIdx); + FailedOccurrences.clear(); + bool HasPreviousSharedOccurrence = false; + + // Continuously consider potentital chain sizes for this candidate until + // they are no longer profitable. + size_t OrigSize = Size, LastValidSize = 0; + for (size_t SizeFromIdx = i, AugmentAmount = 0; + Size >= MinInstructionLen;) { + bool AddedNewOccurrence = false; + + // Augment the candidate set by the change in size from the + // last iteration. + if (AugmentAmount > 0) + for (size_t Idx : Occurrences) + CandidateInterval.reset(Idx + Size, Idx + Size + AugmentAmount); + LastValidSize = Size; + + // After augmenting the candidate set, there may be new candidates that + // no longer overlap with any of the others currently being considered. + for (auto It = FailedOccurrences.begin(), E = FailedOccurrences.end(); + It != E;) { + size_t Idx = *It; + ++It; + if (CandidateInterval.find_first_in(Idx, Idx + Size) != -1) + continue; + FailedOccurrences.erase(Idx); + CandidateInterval.set(Idx, Idx + Size); + Occurrences.push_back(Idx); + AddedNewOccurrence = true; + } + + // Count the number of occurrences. + for (size_t j = i + Occurrences.size(); j < e; ++j) { + // The longest common prefix must be able to hold our size. + if ((size_t)LcpArr[j - 1] < Size) + break; + + // Check to see if this candidate overlaps with any of our currently + // considered candidates. If it doesn't we add it to our current set. + size_t JIdx = SuffixArr[j]; + if (CandidateInterval.find_first_in(JIdx, JIdx + Size) == -1) { + CandidateInterval.set(JIdx, JIdx + Size); + Occurrences.push_back(JIdx); + AddedNewOccurrence = true; + } else + FailedOccurrences.insert(JIdx); + + // If our next size is less than the current, we won't get any more + // candidates for this chain. + if ((size_t)LcpArr[j] < Size) + break; + } + + // If we added a new candidate and we have enough to satisfy our + // constraints then we build a new outline chain candidate. + if (AddedNewOccurrence && Occurrences.size() >= MinOccurrences) { + // Recheck the prune size each iteration. + if (!prePrune(Occurrences, Size)) { + /// Cache shared sizes between candidates chains to make analysis + /// easier. + if (HasPreviousSharedOccurrence) + CL.back().SharedSizeWithNext = Size; + else + HasPreviousSharedOccurrence = true; + /// Build new function with candidate sequence. + CL.emplace_back(Size, Occurrences); + } + } + + // Find the next size to consider for this candidate. + for (size_t NewSizeE = e - 1; ++SizeFromIdx < NewSizeE;) { + size_t NewSize = static_cast(LcpArr[SizeFromIdx]); + if (NewSize < Size) { + AugmentAmount = Size - NewSize; + Size = NewSize; + break; + } + } + + // If we have already encountered a greater size, then the new size + // was either invalid or we've already considered this size but + // with more candidates. + if (Size == LastValidSize || Size <= PrevSize) + break; + } + for (unsigned Idx : Occurrences) + CandidateInterval.reset(Idx, Idx + LastValidSize); + PrevSize = OrigSize; + } +} +void SequencedOutlinerImpl::findOutliningOccurrences( + std::vector &CL) { + OM.prepareForCandidateSelection(); + CL.clear(); + if(CSM == CSM_SuffixArray) + findSACandidates(CL); + else if(CSM == CSM_SuffixTree) + findSTCandidates(CL); + else + llvm_unreachable("Unknown outliner candidate selection method."); +} + +bool SequencedOutlinerImpl::pruneAndOutline(std::vector &CL) { + // Helper comparator for candidate indexes. + struct Comparator { + Comparator(ArrayRef CL) : CL(CL) {} + bool operator()(unsigned L, unsigned R) { + return CL[L].Benefit < CL[R].Benefit; + } + ArrayRef CL; + }; + + // Build a priority worklist for the valid candidates. + std::vector OriginalSetOfValidCands; + OriginalSetOfValidCands.reserve(CL.size()); + for (unsigned i = 0, e = CL.size(); i < e; ++i) + if (CL[i].isValid()) + OriginalSetOfValidCands.push_back(i); + + Comparator C(CL); + PriorityQueue, Comparator> MostBenefitial( + C, OriginalSetOfValidCands); + BitVector InsertedOccurrences(OM.getNumMappedInstructions()); + BitVector ValidOccurrencesPerCandidate; + size_t OutlinedCandidates = 0; + while (!MostBenefitial.empty()) { + unsigned CandIdx = MostBenefitial.top(); + MostBenefitial.pop(); + + OutlineCandidate &OC = CL[CandIdx]; + ValidOccurrencesPerCandidate.reset(); + ValidOccurrencesPerCandidate.resize(OC.size()); + + // Check overlaps. + for (ssize_t i = OC.size() - 1; i >= 0; --i) { + unsigned Occur = OC.getOccurrence(i); + if (InsertedOccurrences.find_first_in(Occur, Occur + OC.Len) == -1) { + ValidOccurrencesPerCandidate.set(i); + continue; + } + if (OC.Benefit < OC.BenefitPerOccur) { + OC.invalidate(); + break; + } + OC.Benefit -= OC.BenefitPerOccur; + OC.removeOccurrence(i); + } + + // Add valid occurrences if this candidate is still profitable. + if (!OC.isValid()) + continue; + + // If we have a cheap benefit function then we update the benefit + // to get the candidate that is actually the best. + if (ValidOccurrencesPerCandidate.size() != OC.size()) { + MostBenefitial.push(CandIdx); + continue; + } + + // Outline the remaining valid occurrences from this candidate. + OutlineFn(OC, OutlinedCandidates++); + for (unsigned OccurIdx : ValidOccurrencesPerCandidate.set_bits()) { + unsigned Occur = OC.getOccurrence(OccurIdx); + InsertedOccurrences.set(Occur, Occur + OC.Len); + } + } + return OutlinedCandidates > 0; +}