Index: include/llvm/Target/TargetInstrInfo.h =================================================================== --- include/llvm/Target/TargetInstrInfo.h +++ include/llvm/Target/TargetInstrInfo.h @@ -331,7 +331,8 @@ /// /// \p Orig must not return true for MachineInstr::isNotDuplicable(). virtual MachineInstr &duplicate(MachineBasicBlock &MBB, - MachineBasicBlock::iterator InsertBefore, const MachineInstr &Orig) const; + MachineBasicBlock::iterator InsertBefore, + const MachineInstr &Orig) const; /// This method must be implemented by targets that /// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target @@ -1561,33 +1562,41 @@ return false; } - /// \brief Returns the number of instructions that will be taken to call a - /// function defined by the sequence on the closed interval [ \p StartIt, \p - /// EndIt]. - /// - /// \returns The number of instructions for the call in the first member, - /// and a target-defined unsigned describing what type of call to emit in the - /// second member. - virtual std::pair - getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const { - llvm_unreachable( - "Target didn't implement TargetInstrInfo::getOutliningCallOverhead!"); - } + /// \brief Describes the number of instructions that it will take to call and + /// construct a frame for a given outlining candidate. + struct MachineOutlinerInfo { + /// Number of instructions to call an outlined function for this candidate. + unsigned CallOverhead; - /// \brief Returns the number of instructions that will be taken to construct - /// an outlined function frame for a function defined on the closed interval - /// [ \p StartIt, \p EndIt]. - /// - /// \returns The number of instructions for the frame in the first member - /// and a target-defined unsigned describing what type of frame to construct - /// in the second member. - virtual std::pair getOutliningFrameOverhead( + /// \brief Number of instructions to construct an outlined function frame + /// for this candidate. + unsigned FrameOverhead; + + /// \brief Represents the specific instructions that must be emitted to + /// construct a call to this candidate. + unsigned CallConstructionID; + + /// \brief Represents the specific instructions that must be emitted to + /// construct a frame for this candidate's outlined function. + unsigned FrameConstructionID; + + MachineOutlinerInfo() {} + MachineOutlinerInfo(unsigned CallOverhead, unsigned FrameOverhead, + unsigned CallConstructionID, + unsigned FrameConstructionID) + : CallOverhead(CallOverhead), FrameOverhead(FrameOverhead), + CallConstructionID(CallConstructionID), + FrameConstructionID(FrameConstructionID) {} + }; + + /// \brief Returns a \p MachineOutlinerInfo struct containing target-specific + /// information for a set of outlining candidates. + virtual MachineOutlinerInfo getOutlininingCandidateInfo( std::vector< std::pair> - &CandidateClass) const { + &RepeatedSequenceLocs) const { llvm_unreachable( - "Target didn't implement TargetInstrInfo::getOutliningFrameOverhead!"); + "Target didn't implement TargetInstrInfo::getOutliningOverhead!"); } /// Represents how an instruction should be mapped by the outliner. @@ -1608,7 +1617,7 @@ /// emitted. virtual void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const { + const MachineOutlinerInfo &MInfo) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinerEpilogue!"); } @@ -1619,7 +1628,7 @@ virtual MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - unsigned CallClass) const { + const MachineOutlinerInfo &MInfo) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinedCall!"); } @@ -1628,13 +1637,15 @@ /// This may be empty, in which case no prologue will be emitted. virtual void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const { + const MachineOutlinerInfo &MInfo) const { llvm_unreachable( "Target didn't implement TargetInstrInfo::insertOutlinerPrologue!"); } /// Return true if the function can safely be outlined from. - /// By default, this means that the function has no red zone. + /// A function \p MF is considered safe for outlining if an outlined function + /// produced from instructions in F will produce a program which produces the + /// same output for any set of given inputs. virtual bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const { llvm_unreachable("Target didn't implement " "TargetInstrInfo::isFunctionSafeToOutlineFrom!"); Index: lib/CodeGen/MachineOutliner.cpp =================================================================== --- lib/CodeGen/MachineOutliner.cpp +++ lib/CodeGen/MachineOutliner.cpp @@ -15,6 +15,23 @@ /// instructions. If a sequence of instructions appears often, then it ought /// to be beneficial to pull out into a function. /// +/// The MachineOutliner communicates with a given target using hooks defined in +/// TargetInstrInfo.h. The target supplies the outliner with information on how +/// a specific sequence of instructions should be outlined. This information +/// is used to deduce the number of instructions necessary to +/// +/// * Create an outlined function +/// * Call that outlined function +/// +/// Targets must implement +/// * getOutliningCandidateInfo +/// * insertOutlinerEpilogue +/// * insertOutlinedCall +/// * insertOutlinerPrologue +/// * isFunctionSafeToOutlineFrom +/// +/// in order to make use of the MachineOutliner. +/// /// This was originally presented at the 2016 LLVM Developers' Meeting in the /// talk "Reducing Code Size Using Outlining". For a high-level overview of /// how this pass works, the talk is available on YouTube at @@ -80,17 +97,17 @@ bool InCandidateList = true; /// The start index of this \p Candidate. - size_t StartIdx; + unsigned StartIdx; /// The number of instructions in this \p Candidate. - size_t Len; + unsigned Len; /// The index of this \p Candidate's \p OutlinedFunction in the list of /// \p OutlinedFunctions. - size_t FunctionIdx; + unsigned FunctionIdx; - /// Target-defined unsigned defining how to emit a call for this candidate. - unsigned CallClass = 0; + /// Contains all target-specific information for this \p Candidate. + TargetInstrInfo::MachineOutlinerInfo MInfo; /// \brief The number of instructions that would be saved by outlining every /// candidate of this type. @@ -101,9 +118,8 @@ /// for some given candidate. unsigned Benefit = 0; - Candidate(size_t StartIdx, size_t Len, size_t FunctionIdx, unsigned CallClass) - : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx), - CallClass(CallClass) {} + Candidate(unsigned StartIdx, unsigned Len, unsigned FunctionIdx) + : StartIdx(StartIdx), Len(Len), FunctionIdx(FunctionIdx) {} Candidate() {} @@ -120,11 +136,11 @@ /// This is initialized after we go through and create the actual function. MachineFunction *MF = nullptr; - /// A numbefr assigned to this function which appears at the end of its name. - size_t Name; + /// A number assigned to this function which appears at the end of its name. + unsigned Name; /// The number of candidates for this OutlinedFunction. - size_t OccurrenceCount = 0; + unsigned OccurrenceCount = 0; /// \brief The sequence of integers corresponding to the instructions in this /// function. @@ -133,18 +149,18 @@ /// The number of instructions this function would save. unsigned Benefit = 0; - /// Target-defined unsigned defining how to emit the frame for this function. - unsigned FrameClass = 0; + /// Contains all target-specific information for this \p OutlinedFunction. + TargetInstrInfo::MachineOutlinerInfo MInfo; - OutlinedFunction(size_t Name, size_t OccurrenceCount, + OutlinedFunction(unsigned Name, unsigned OccurrenceCount, const std::vector &Sequence, unsigned Benefit, - unsigned FrameClass) + TargetInstrInfo::MachineOutlinerInfo &MInfo) : Name(Name), OccurrenceCount(OccurrenceCount), Sequence(Sequence), - Benefit(Benefit), FrameClass(FrameClass) {} + Benefit(Benefit), MInfo(MInfo) {} }; /// Represents an undefined index in the suffix tree. -const size_t EmptyIdx = -1; +const unsigned EmptyIdx = -1; /// A node in a suffix tree which represents a substring or suffix. /// @@ -174,7 +190,7 @@ bool IsInTree = true; /// The start index of this node's substring in the main string. - size_t StartIdx = EmptyIdx; + unsigned StartIdx = EmptyIdx; /// The end index of this node's substring in the main string. /// @@ -182,12 +198,12 @@ /// step in the construction algorithm. To avoid having to update O(N) /// nodes individually at the end of every step, the end index is stored /// as a pointer. - size_t *EndIdx = nullptr; + unsigned *EndIdx = nullptr; /// For leaves, the start index of the suffix represented by this node. /// /// For all other nodes, this is ignored. - size_t SuffixIdx = EmptyIdx; + unsigned SuffixIdx = EmptyIdx; /// \brief For internal nodes, a pointer to the internal node representing /// the same sequence with the first character chopped off. @@ -216,11 +232,11 @@ /// /// This is equal to the number of leaf children of the string. It represents /// the number of suffixes that the node's string is a prefix of. - size_t OccurrenceCount = 0; + unsigned OccurrenceCount = 0; /// The length of the string formed by concatenating the edge labels from the /// root to this node. - size_t ConcatLen = 0; + unsigned ConcatLen = 0; /// Returns true if this node is a leaf. bool isLeaf() const { return SuffixIdx != EmptyIdx; } @@ -242,7 +258,7 @@ return *EndIdx - StartIdx + 1; } - SuffixTreeNode(size_t StartIdx, size_t *EndIdx, SuffixTreeNode *Link, + SuffixTreeNode(unsigned StartIdx, unsigned *EndIdx, SuffixTreeNode *Link, SuffixTreeNode *Parent) : StartIdx(StartIdx), EndIdx(EndIdx), Link(Link), Parent(Parent) {} @@ -301,7 +317,7 @@ BumpPtrAllocator InternalEndIdxAllocator; /// The end index of each leaf in the tree. - size_t LeafEndIdx = -1; + unsigned LeafEndIdx = -1; /// \brief Helper struct which keeps track of the next insertion point in /// Ukkonen's algorithm. @@ -310,10 +326,10 @@ SuffixTreeNode *Node; /// The index of the first character in the substring currently being added. - size_t Idx = EmptyIdx; + unsigned Idx = EmptyIdx; /// The length of the substring we have to add at the current step. - size_t Len = 0; + unsigned Len = 0; }; /// \brief The point the next insertion will take place at in the @@ -327,7 +343,7 @@ /// \param Edge The label on the edge leaving \p Parent to this node. /// /// \returns A pointer to the allocated leaf node. - SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, size_t StartIdx, + SuffixTreeNode *insertLeaf(SuffixTreeNode &Parent, unsigned StartIdx, unsigned Edge) { assert(StartIdx <= LeafEndIdx && "String can't start after it ends!"); @@ -347,14 +363,14 @@ /// \param Edge The label on the edge leaving \p Parent to this node. /// /// \returns A pointer to the allocated internal node. - SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, size_t StartIdx, - size_t EndIdx, unsigned Edge) { + SuffixTreeNode *insertInternalNode(SuffixTreeNode *Parent, unsigned StartIdx, + unsigned EndIdx, unsigned Edge) { assert(StartIdx <= EndIdx && "String can't start after it ends!"); assert(!(!Parent && StartIdx != EmptyIdx) && "Non-root internal nodes must have parents!"); - size_t *E = new (InternalEndIdxAllocator) size_t(EndIdx); + unsigned *E = new (InternalEndIdxAllocator) unsigned(EndIdx); SuffixTreeNode *N = new (NodeAllocator.Allocate()) SuffixTreeNode(StartIdx, E, Root, Parent); if (Parent) @@ -369,7 +385,7 @@ /// /// \param[in] CurrNode The node currently being visited. /// \param CurrIdx The current index of the string being visited. - void setSuffixIndices(SuffixTreeNode &CurrNode, size_t CurrIdx) { + void setSuffixIndices(SuffixTreeNode &CurrNode, unsigned CurrIdx) { bool IsLeaf = CurrNode.Children.size() == 0 && !CurrNode.isRoot(); @@ -415,7 +431,7 @@ /// /// \returns The number of suffixes that have not been added at the end of /// this step. - unsigned extend(size_t EndIdx, size_t SuffixesToAdd) { + unsigned extend(unsigned EndIdx, unsigned SuffixesToAdd) { SuffixTreeNode *NeedsLink = nullptr; while (SuffixesToAdd > 0) { @@ -447,7 +463,7 @@ // insert a new node. SuffixTreeNode *NextNode = Active.Node->Children[FirstChar]; - size_t SubstringLen = NextNode->size(); + unsigned SubstringLen = NextNode->size(); // Is the current suffix we're trying to insert longer than the size of // the child we want to move to? @@ -543,13 +559,14 @@ // Keep track of the number of suffixes we have to add of the current // prefix. - size_t SuffixesToAdd = 0; + unsigned SuffixesToAdd = 0; Active.Node = Root; // Construct the suffix tree iteratively on each prefix of the string. // PfxEndIdx is the end index of the current prefix. // End is one past the last element in the string. - for (size_t PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; PfxEndIdx++) { + for (unsigned PfxEndIdx = 0, End = Str.size(); PfxEndIdx < End; + PfxEndIdx++) { SuffixesToAdd++; LeafEndIdx = PfxEndIdx; // Extend each of the leaves. SuffixesToAdd = extend(PfxEndIdx, SuffixesToAdd); @@ -747,10 +764,10 @@ /// type of candidate. /// /// \returns The length of the longest candidate found. - size_t findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, - InstructionMapper &Mapper, - std::vector &CandidateList, - std::vector &FunctionList); + unsigned findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, + InstructionMapper &Mapper, + std::vector &CandidateList, + std::vector &FunctionList); /// \brief Replace the sequences of instructions represented by the /// \p Candidates in \p CandidateList with calls to \p MachineFunctions @@ -820,16 +837,15 @@ INITIALIZE_PASS(MachineOutliner, DEBUG_TYPE, "Machine Function Outliner", false, false) -size_t +unsigned MachineOutliner::findCandidates(SuffixTree &ST, const TargetInstrInfo &TII, InstructionMapper &Mapper, std::vector &CandidateList, std::vector &FunctionList) { - CandidateList.clear(); FunctionList.clear(); - size_t FnIdx = 0; - size_t MaxLen = 0; + unsigned FnIdx = 0; + unsigned MaxLen = 0; // FIXME: Visit internal nodes instead of leaves. for (SuffixTreeNode *Leaf : ST.LeafVector) { @@ -845,7 +861,7 @@ continue; // Figure out if this candidate is beneficial. - size_t StringLen = Leaf->ConcatLen - Leaf->size(); + unsigned StringLen = Leaf->ConcatLen - (unsigned)Leaf->size(); // Too short to be beneficial; skip it. // FIXME: This isn't necessarily true for, say, X86. If we factor in @@ -853,18 +869,17 @@ if (StringLen < 2) continue; - size_t CallOverhead = 0; - size_t SequenceOverhead = StringLen; - // If this is a beneficial class of candidate, then every one is stored in // this vector. std::vector CandidatesForRepeatedSeq; - // Used for getOutliningFrameOverhead. + // Describes the start and end point of each candidate. This allows the + // target to infer some information about each occurrence of each repeated + // sequence. // FIXME: CandidatesForRepeatedSeq and this should be combined. std::vector< std::pair> - CandidateClass; + RepeatedSequenceLocs; // Figure out the call overhead for each instance of the sequence. for (auto &ChildPair : Parent.Children) { @@ -876,26 +891,21 @@ MachineBasicBlock::iterator EndIt = Mapper.InstrList[M->SuffixIdx + StringLen - 1]; - // Get the overhead for calling a function for this sequence and any - // target-specified data for how to construct the call. - std::pair CallOverheadPair = - TII.getOutliningCallOverhead(StartIt, EndIt); - CallOverhead += CallOverheadPair.first; - CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx, - CallOverheadPair.second); - CandidateClass.emplace_back(std::make_pair(StartIt, EndIt)); + CandidatesForRepeatedSeq.emplace_back(M->SuffixIdx, StringLen, FnIdx); + RepeatedSequenceLocs.emplace_back(std::make_pair(StartIt, EndIt)); // Never visit this leaf again. M->IsInTree = false; } } - std::pair FrameOverheadPair = - TII.getOutliningFrameOverhead(CandidateClass); - size_t FrameOverhead = FrameOverheadPair.first; + unsigned SequenceOverhead = StringLen; + TargetInstrInfo::MachineOutlinerInfo MInfo = + TII.getOutlininingCandidateInfo(RepeatedSequenceLocs); - size_t OutliningCost = CallOverhead + FrameOverhead + SequenceOverhead; - size_t NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; + unsigned OutliningCost = + (MInfo.CallOverhead * Parent.OccurrenceCount) + MInfo.FrameOverhead; + unsigned NotOutliningCost = SequenceOverhead * Parent.OccurrenceCount; // Is it better to outline this candidate than not? if (NotOutliningCost <= OutliningCost) { @@ -903,14 +913,14 @@ // outlining. // Emit a remark explaining why we didn't outline this candidate. std::pair C = - CandidateClass[0]; + RepeatedSequenceLocs[0]; MachineOptimizationRemarkEmitter MORE( *(C.first->getParent()->getParent()), nullptr); MachineOptimizationRemarkMissed R(DEBUG_TYPE, "NotOutliningCheaper", C.first->getDebugLoc(), C.first->getParent()); R << "Did not outline " << NV("Length", StringLen) << " instructions" - << " from " << NV("NumOccurrences", CandidateClass.size()) + << " from " << NV("NumOccurrences", RepeatedSequenceLocs.size()) << " locations." << " Instructions from outlining all occurrences (" << NV("OutliningCost", OutliningCost) << ")" @@ -919,9 +929,9 @@ << " (Also found at: "; // Tell the user the other places the candidate was found. - for (size_t i = 1, e = CandidateClass.size(); i < e; i++) { + for (unsigned i = 1, e = RepeatedSequenceLocs.size(); i < e; i++) { R << NV((Twine("OtherStartLoc") + Twine(i)).str(), - CandidateClass[i].first->getDebugLoc()); + RepeatedSequenceLocs[i].first->getDebugLoc()); if (i != e - 1) R << ", "; } @@ -933,7 +943,7 @@ continue; } - size_t Benefit = NotOutliningCost - OutliningCost; + unsigned Benefit = NotOutliningCost - OutliningCost; if (StringLen > MaxLen) MaxLen = StringLen; @@ -942,6 +952,7 @@ // benefit values and save them in the candidate list. for (Candidate &C : CandidatesForRepeatedSeq) { C.Benefit = Benefit; + C.MInfo = MInfo; CandidateList.push_back(C); } @@ -951,8 +962,7 @@ CandidateSequence.push_back(ST.Str[i]); FunctionList.emplace_back(FnIdx, CandidatesForRepeatedSeq.size(), - CandidateSequence, Benefit, - FrameOverheadPair.second); + CandidateSequence, Benefit, MInfo); // Move to the next function. FnIdx++; @@ -1023,7 +1033,7 @@ continue; } - size_t C2End = C2.StartIdx + C2.Len - 1; + unsigned C2End = C2.StartIdx + C2.Len - 1; // Do C1 and C2 overlap? // @@ -1050,13 +1060,9 @@ F2.OccurrenceCount--; // Remove the call overhead from the removed sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C2.StartIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[C2.StartIdx + C2.Len - 1]; + F2.Benefit += C2.MInfo.CallOverhead; - F2.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first; // Add back one instance of the sequence. - if (F2.Sequence.size() > F2.Benefit) F2.Benefit = 0; else @@ -1077,11 +1083,7 @@ F1.OccurrenceCount--; // Remove the call overhead from the removed sequence. - MachineBasicBlock::iterator StartIt = Mapper.InstrList[C1.StartIdx]; - MachineBasicBlock::iterator EndIt = - Mapper.InstrList[C1.StartIdx + C1.Len - 1]; - - F1.Benefit += TII.getOutliningCallOverhead(StartIt, EndIt).first; + F1.Benefit += C1.MInfo.CallOverhead; // Add back one instance of the sequence. if (F1.Sequence.size() > F1.Benefit) @@ -1110,7 +1112,7 @@ const TargetInstrInfo &TII) { std::vector CandidateSequence; // Current outlining candidate. - size_t MaxCandidateLen = 0; // Length of the longest candidate. + unsigned MaxCandidateLen = 0; // Length of the longest candidate. MaxCandidateLen = findCandidates(ST, TII, Mapper, CandidateList, FunctionList); @@ -1157,7 +1159,7 @@ // Insert the new function into the module. MF.insert(MF.begin(), &MBB); - TII.insertOutlinerPrologue(MBB, MF, OF.FrameClass); + TII.insertOutlinerPrologue(MBB, MF, OF.MInfo); // Copy over the instructions for the function using the integer mappings in // its sequence. @@ -1172,7 +1174,7 @@ MBB.insert(MBB.end(), NewMI); } - TII.insertOutlinerEpilogue(MBB, MF, OF.FrameClass); + TII.insertOutlinerEpilogue(MBB, MF, OF.MInfo); return &MF; } @@ -1183,7 +1185,6 @@ InstructionMapper &Mapper) { bool OutlinedSomething = false; - // Replace the candidates with calls to their respective outlined functions. for (const Candidate &C : CandidateList) { @@ -1221,7 +1222,7 @@ const TargetInstrInfo &TII = *STI.getInstrInfo(); // Insert a call to the new function and erase the old sequence. - TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.CallClass); + TII.insertOutlinedCall(M, *MBB, StartIt, *MF, C.MInfo); StartIt = Mapper.InstrList[C.StartIdx]; MBB->erase(StartIt, EndIt); Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -350,24 +350,23 @@ ArrayRef> getSerializableMachineMemOperandTargetFlags() const override; + bool + canOutlineWithoutLRSave(MachineBasicBlock::iterator &CallInsertionPt) const; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; - std::pair - getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const override; - std::pair getOutliningFrameOverhead( + MachineOutlinerInfo getOutlininingCandidateInfo( std::vector< std::pair> - &CandidateClass) const override; + &RepeatedSequenceLocs) const override; AArch64GenInstrInfo::MachineOutlinerInstrType getOutliningType(MachineInstr &MI) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const override; + const MachineOutlinerInfo &MInfo) const override; void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const override; + const MachineOutlinerInfo &MInfo) const override; MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - unsigned CallClass) const override; + const MachineOutlinerInfo &MInfo) const override; /// Returns true if the instruction has a shift left that can be executed /// more efficiently. bool isExynosShiftLeftFast(const MachineInstr &MI) const; Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -4534,38 +4535,124 @@ return makeArrayRef(TargetFlags); } -// Constants defining how certain sequences should be outlined. -const unsigned MachineOutlinerDefaultFn = 0; -const unsigned MachineOutlinerTailCallFn = 1; - -std::pair AArch64InstrInfo::getOutliningCallOverhead( - MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const { - // Is this a tail-call? - if (EndIt->isTerminator()) { - // Yes, so we only have to emit a call. Return a cost of 1 + signify that - // this candidate should be tail-called. - return std::make_pair(1, MachineOutlinerTailCallFn); - } +/// Constants defining how certain sequences should be outlined. +/// This encompasses how an outlined function should be called, and what kind of +/// frame should be emitted for that outlined function. +/// +/// \p MachineOutlinerDefault implies that the function should be called with +/// a save and restore of LR to the stack. +/// +/// That is, +/// +/// I1 Save LR OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 Restore LR I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 3 (save + BL + restore) +/// * Frame construction overhead: 1 (ret) +/// * Requires stack fixups? Yes +/// +/// \p MachineOutlinerTailCall implies that the function is being created from +/// a sequence of instructions ending in a return. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> B OUTLINED_FUNCTION I1 +/// RET I2 +/// RET +/// +/// * Call construction overhead: 1 (B) +/// * Frame construction overhead: 0 (Return included in sequence) +/// * Requires stack fixups? No +/// +/// \p MachineOutlinerNoLRSave implies that the function should be called using +/// a BL instruction, but doesn't require LR to be saved and restored. This +/// happens when LR is known to be dead. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> BL OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// RET +/// +/// * Call construction overhead: 1 (BL) +/// * Frame construction overhead: 1 (RET) +/// * Requires stack fixups? No +/// +enum MachineOutlinerClass { + MachineOutlinerDefault, /// Emit a save, restore, call, and return. + MachineOutlinerTailCall, /// Only emit a branch. + MachineOutlinerNoLRSave /// Emit a call and return. +}; - // No, so save + restore LR. - return std::make_pair(3, MachineOutlinerDefaultFn); +bool AArch64InstrInfo::canOutlineWithoutLRSave( + MachineBasicBlock::iterator &CallInsertionPt) const { + // Was LR saved in the function containing this basic block? + MachineBasicBlock &MBB = *(CallInsertionPt->getParent()); + LiveRegUnits LRU(getRegisterInfo()); + LRU.addLiveOuts(MBB); + + // Get liveness information from the end of the block to the end of the + // prospective outlined region. + std::for_each(MBB.rbegin(), + (MachineBasicBlock::reverse_iterator)CallInsertionPt, + [&LRU](MachineInstr &MI) {LRU.stepBackward(MI);} + ); + + // If the link register is available at this point, then we can safely outline + // the region without saving/restoring LR. Otherwise, we must emit a save and + // restore. + return LRU.available(AArch64::LR); } -std::pair AArch64InstrInfo::getOutliningFrameOverhead( - std::vector> &CandidateClass) const { +AArch64GenInstrInfo::MachineOutlinerInfo +AArch64InstrInfo::getOutlininingCandidateInfo( + std::vector< + std::pair> + &RepeatedSequenceLocs) const { + + unsigned CallID = MachineOutlinerDefault; + unsigned FrameID = MachineOutlinerDefault; + unsigned NumInstrsForCall = 3; + unsigned NumInstrsToCreateFrame = 1; + + auto DoesntNeedLRSave = + [this](std::pair + &I) { return canOutlineWithoutLRSave(I.second); }; + + // If the last instruction in any candidate is a terminator, then we should + // tail call all of the candidates. + if (RepeatedSequenceLocs[0].second->isTerminator()) { + CallID = MachineOutlinerTailCall; + FrameID = MachineOutlinerTailCall; + NumInstrsForCall = 1; + NumInstrsToCreateFrame = 0; + } - // Is the last instruction in this class a terminator? - if (CandidateClass[0].second->isTerminator()) - return std::make_pair(0, MachineOutlinerTailCallFn); + else if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(), + DoesntNeedLRSave)) { + CallID = MachineOutlinerNoLRSave; + FrameID = MachineOutlinerNoLRSave; + NumInstrsForCall = 1; + NumInstrsToCreateFrame = 1; + } - // No, so we have to add a return to the end. - return std::make_pair(1, MachineOutlinerDefaultFn); + return MachineOutlinerInfo(NumInstrsForCall, NumInstrsToCreateFrame, CallID, + FrameID); } bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { - return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone); + // If MF has a red zone, then we ought not to outline from it, since outlined + // functions can modify/read from the stack. + // If MF's address is taken, then we don't want to outline from it either + // since we don't really know what the user is doing with it. + return MF.getFunction()->hasFnAttribute(Attribute::NoRedZone) && + !MF.getFunction()->hasAddressTaken(); } AArch64GenInstrInfo::MachineOutlinerInstrType @@ -4607,6 +4694,10 @@ if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() || MOP.isTargetIndex()) return MachineOutlinerInstrType::Illegal; + + // Don't outline anything that uses the link register. + if (MOP.isReg() && getRegisterInfo().regsOverlap(MOP.getReg(), AArch64::LR)) + return MachineOutlinerInstrType::Illegal; } // Does this use the stack? @@ -4676,12 +4767,12 @@ } } -void AArch64InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, - MachineFunction &MF, - unsigned FrameClass) const { +void AArch64InstrInfo::insertOutlinerEpilogue( + MachineBasicBlock &MBB, MachineFunction &MF, + const MachineOutlinerInfo &MInfo) const { // If this is a tail call outlined function, then there's already a return. - if (FrameClass == MachineOutlinerTailCallFn) + if (MInfo.FrameConstructionID == MachineOutlinerTailCall) return; // It's not a tail call, so we have to insert the return ourselves. @@ -4689,28 +4780,40 @@ .addReg(AArch64::LR, RegState::Undef); MBB.insert(MBB.end(), ret); + // Did we have to modify the stack by saving the link register? + if (MInfo.FrameConstructionID == MachineOutlinerNoLRSave) + return; + + // We modified the stack. // Walk over the basic block and fix up all the stack accesses. fixupPostOutline(MBB); } -void AArch64InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, - MachineFunction &MF, - unsigned FrameClass) const {} +void AArch64InstrInfo::insertOutlinerPrologue( + MachineBasicBlock &MBB, MachineFunction &MF, + const MachineOutlinerInfo &MInfo) const {} MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall( Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, - MachineFunction &MF, unsigned CallClass) const { + MachineFunction &MF, const MachineOutlinerInfo &MInfo) const { // Are we tail calling? - if (CallClass == MachineOutlinerTailCallFn) { + if (MInfo.CallConstructionID == MachineOutlinerTailCall) { // If yes, then we can just branch to the label. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::B)) .addGlobalAddress(M.getNamedValue(MF.getName()))); return It; } - // We're not tail calling, so we have to save LR before the call and restore - // it after. + // Are we saving the link register? + if (MInfo.CallConstructionID == MachineOutlinerNoLRSave) { + // No, so just insert the call. + It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL)) + .addGlobalAddress(M.getNamedValue(MF.getName()))); + return It; + } + + // We have a default call. Save the link register. MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre)) .addReg(AArch64::SP, RegState::Define) .addReg(AArch64::LR) Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -559,14 +559,10 @@ ArrayRef> getSerializableDirectMachineOperandTargetFlags() const override; - std::pair - getOutliningCallOverhead(MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const override; - - std::pair getOutliningFrameOverhead( + virtual MachineOutlinerInfo getOutlininingCandidateInfo( std::vector< std::pair> - &CandidateClass) const override; + &RepeatedSequenceLocs) const override; bool isFunctionSafeToOutlineFrom(MachineFunction &MF) const override; @@ -574,15 +570,15 @@ getOutliningType(MachineInstr &MI) const override; void insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const override; + const MachineOutlinerInfo &MInfo) const override; void insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const override; + const MachineOutlinerInfo &MInfo) const override; MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - unsigned CallClass) const override; + const MachineOutlinerInfo &MInfo) const override; protected: /// Commutes the operands in the given instruction by changing the operands Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -10664,31 +10664,54 @@ FunctionPass* llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); } -// Constants defining how certain sequences should be outlined. -const unsigned MachineOutlinerDefaultFn = 0; -const unsigned MachineOutlinerTailCallFn = 1; - -std::pair X86InstrInfo::getOutliningCallOverhead( - MachineBasicBlock::iterator &StartIt, - MachineBasicBlock::iterator &EndIt) const { - - // Is this a tail call? If it is, make note of it. - if (EndIt->isTerminator()) - return std::make_pair(1, MachineOutlinerTailCallFn); - - return std::make_pair(1, MachineOutlinerDefaultFn); -} - -std::pair X86InstrInfo::getOutliningFrameOverhead( - std::vector> &CandidateClass) const { - // Is this a tail-call? - // Is the last instruction in this class a terminator? - if (CandidateClass[0].second->isTerminator()) - return std::make_pair(0, MachineOutlinerTailCallFn); +/// Constants defining how certain sequences should be outlined. +/// +/// \p MachineOutlinerDefault implies that the function is called with a call +/// instruction, and a return must be emitted for the outlined function frame. +/// +/// That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> call OUTLINED_FUNCTION I1 +/// I3 I2 +/// I3 +/// ret +/// +/// * Call construction overhead: 1 (call instruction) +/// * Frame construction overhead: 1 (return instruction) +/// +/// \p MachineOutlinerTailCall implies that the function is being tail called. +/// A jump is emitted instead of a call, and the return is already present in +/// the outlined sequence. That is, +/// +/// I1 OUTLINED_FUNCTION: +/// I2 --> jmp OUTLINED_FUNCTION I1 +/// ret I2 +/// ret +/// +/// * Call construction overhead: 1 (jump instruction) +/// * Frame construction overhead: 0 (don't need to return) +/// +enum MachineOutlinerClass { + MachineOutlinerDefault, + MachineOutlinerTailCall +}; - // No, so we have to add a return to the end. - return std::make_pair(1, MachineOutlinerDefaultFn); +X86GenInstrInfo::MachineOutlinerInfo +X86InstrInfo::getOutlininingCandidateInfo( + std::vector< + std::pair> + &RepeatedSequenceLocs) const { + + if (RepeatedSequenceLocs[0].second->isTerminator()) + return MachineOutlinerInfo(1, // Number of instructions to emit call. + 0, // Number of instructions to emit frame. + MachineOutlinerTailCall, // Type of call. + MachineOutlinerTailCall // Type of frame. + ); + + return MachineOutlinerInfo(1, 1, MachineOutlinerDefault, + MachineOutlinerDefault); } bool X86InstrInfo::isFunctionSafeToOutlineFrom(MachineFunction &MF) const { @@ -10752,10 +10775,10 @@ void X86InstrInfo::insertOutlinerEpilogue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const { - + const MachineOutlinerInfo &MInfo) + const { // If we're a tail call, we already have a return, so don't do anything. - if (FrameClass == MachineOutlinerTailCallFn) + if (MInfo.FrameConstructionID == MachineOutlinerTailCall) return; // We're a normal call, so our sequence doesn't have a return instruction. @@ -10766,15 +10789,16 @@ void X86InstrInfo::insertOutlinerPrologue(MachineBasicBlock &MBB, MachineFunction &MF, - unsigned FrameClass) const {} + const MachineOutlinerInfo &MInfo) + const {} MachineBasicBlock::iterator X86InstrInfo::insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, - unsigned CallClass) const { + const MachineOutlinerInfo &MInfo) const { // Is it a tail call? - if (CallClass == MachineOutlinerTailCallFn) { + if (MInfo.CallConstructionID == MachineOutlinerTailCall) { // Yes, just insert a JMP. It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(X86::JMP_1)) Index: test/CodeGen/AArch64/machine-outliner-remarks.ll =================================================================== --- test/CodeGen/AArch64/machine-outliner-remarks.ll +++ test/CodeGen/AArch64/machine-outliner-remarks.ll @@ -1,7 +1,7 @@ ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -pass-remarks-missed=machine-outliner -o /dev/null 2>&1 | FileCheck %s ; CHECK: machine-outliner-remarks.ll:5:9: ; CHECK-SAME: Did not outline 2 instructions from 2 locations. -; CHECK-SAME: Instructions from outlining all occurrences (9) >= +; CHECK-SAME: Instructions from outlining all occurrences (7) >= ; CHECK-SAME: Unoutlined instruction count (4) ; CHECK-SAME: (Also found at: machine-outliner-remarks.ll:13:9) ; RUN: llc %s -enable-machine-outliner -mtriple=aarch64-unknown-unknown -o /dev/null -pass-remarks-missed=machine-outliner -pass-remarks-output=%t.yaml @@ -19,7 +19,7 @@ ; YAML-NEXT: - NumOccurrences: '2' ; YAML-NEXT: - String: ' locations.' ; YAML-NEXT: - String: ' Instructions from outlining all occurrences (' -; YAML-NEXT: - OutliningCost: '9' +; YAML-NEXT: - OutliningCost: '7' ; YAML-NEXT: - String: ')' ; YAML-NEXT: - String: ' >= Unoutlined instruction count (' ; YAML-NEXT: - NotOutliningCost: '4' Index: test/CodeGen/AArch64/machine-outliner.mir =================================================================== --- test/CodeGen/AArch64/machine-outliner.mir +++ test/CodeGen/AArch64/machine-outliner.mir @@ -1,42 +1,38 @@ # RUN: llc -mtriple=aarch64--- -run-pass=machine-outliner %s -o - | FileCheck %s --- | - target triple = "aarch64---" - + define i32 @main() #0 { - entry: ret i32 0 } - attributes #0 = { noinline noredzone nounwind optnone ssp uwtable } - -# CHECK-LABEL: @OUTLINED_FUNCTION_0 - + define void @bar(i32 %a) #0 { + ret void + } + + attributes #0 = { noinline noredzone "no-frame-pointer-elim"="true" } ... --- # This test ensures that we # - Create outlined functions # - Don't outline anything to do with LR or W30 +# - Save LR when it's not available # # CHECK-LABEL: name: main -# CHECK: BL @OUTLINED_FUNCTION_0 -# CHECK: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK: %lr = ORRXri %xzr, 1 -# CHECK: BL @OUTLINED_FUNCTION_0 -# CHECK: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK: %lr = ORRXri %xzr, 1 -# CHECK: BL @OUTLINED_FUNCTION_0 -# CHECK: STRHHroW %w16, %x9, %w30, 1, 1 -# CHECK: %lr = ORRXri %xzr, 1 +# CHECK: BL @OUTLINED_FUNCTION_[[F0:[0-9]+]] +# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 +# CHECK-NEXT: %lr = ORRXri %xzr, 1 +# CHECK: BL @OUTLINED_FUNCTION_[[F0]] +# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 +# CHECK-NEXT: %lr = ORRXri %xzr, 1 +# CHECK: BL @OUTLINED_FUNCTION_[[F0]] +# CHECK-NEXT: early-clobber %sp, %lr = LDRXpost %sp, 16 +# CHECK-NEXT: STRHHroW %w16, %x9, %w30, 1, 1 +# CHECK-NEXT: %lr = ORRXri %xzr, 1 name: main -alignment: 2 -tracksRegLiveness: true -frameInfo: - stackSize: 16 - maxAlignment: 4 - maxCallFrameSize: 0 - body: | - bb.0.entry: + bb.0: %sp = frame-setup SUBXri %sp, 16, 0 %x9 = ORRXri %xzr, 1 %w16 = ORRWri %wzr, 1 @@ -74,8 +70,43 @@ STRHHroW %w16, %x9, %w30, 1, 1 %lr = ORRXri %xzr, 1 - %w5 = ORRWri %wzr, 1995 - %sp = ADDXri %sp, 16, 0 RET undef %lr - \ No newline at end of file + +... +--- +# This test ensures that we can avoid saving LR when it's available. +# CHECK-LABEL: bb.1: +# CHECK: BL @OUTLINED_FUNCTION_[[F1:[0-9]+]], implicit-def %lr, implicit %sp +# CHECK-NEXT: %w17 = ORRWri %wzr, 2 +# CHECK-NEXT: BL @OUTLINED_FUNCTION_[[F1]], implicit-def %lr, implicit %sp +# CHECK-NEXT: %w8 = ORRWri %wzr, 0 +name: bar +tracksRegLiveness: true +body: | + bb.0: + liveins: %w0, %lr, %w8 + %sp = frame-setup SUBXri %sp, 32, 0 + %fp = frame-setup ADDXri %sp, 16, 0 + + bb.1: + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 2 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w17 = ORRWri %wzr, 1 + %w8 = ORRWri %wzr, 0 + + bb.2: + %fp, %lr = LDPXi %sp, 2 + %sp = ADDXri %sp, 32, 0 + RET undef %lr + +... + +# CHECK-LABEL: name: OUTLINED_FUNCTION_{{[0-9]}} +# CHECK=LABEL: name: OUTLINED_FUNCTION_{{[1-9]}}