diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -7,10 +7,17 @@ //===----------------------------------------------------------------------===// /// \file /// Contains a collection of routines for determining if a given instruction is -/// guaranteed to execute if a given point in control flow is reached. The most +/// guaranteed to execute if a given point in control flow is reached. The most /// common example is an instruction within a loop being provably executed if we /// branch to the header of it's containing loop. /// +/// There are two interfaces available to determine if an instruction is +/// executed once a given point in the control flow is reached: +/// 1) A loop-centric one derived from LoopSafetyInfo. +/// 2) A "must be executed context"-based one implemented in the +/// MustBeExecutedContextExplorer. +/// Please refer to the class comments for more information. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H @@ -211,6 +218,261 @@ virtual ~ICFLoopSafetyInfo() {}; }; -} +/// Forward declaration. +struct MustBeExecutedContextExplorer; + +/// Must be executed iterators visit stretches of instructions that are +/// guaranteed to be executed together, potentially with other instruction +/// executed in-between. +/// +/// Given the following code, and assuming all statements are single +/// instructions which transfer execution to the successor (see +/// isGuaranteedToTransferExecutionToSuccessor), there are two possible +/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B, +/// (and E). If we start at C or D, we will visit all instructions A-E. +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// \endcode +/// +/// +/// Below is the example extneded with instructions F and G. Now we assume F +/// might not transfer execution to it's successor G. As a result we get the +/// following visit sets: +/// +/// Start Instruction | Visit Set +/// A | A, B, +/// B, | B, +/// C | C, D, E, F +/// D | D, E, F +/// E | E, F +/// F | F +/// G | G +/// +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// F; // Might not transfer execution to its successor G. +/// G; +/// \endcode +/// +/// +/// Note that the examples show optimal visist sets but not necessarily the ones +/// derived by the explorer depending on the available CFG analyses (see +/// MustBeExecutedContextExplorer). Also note that we, depending on the options, +/// the visit set can contain instructions from other functions. +struct MustBeExecutedIterator { + /// Type declarations that make his class an input iterator. + ///{ + typedef const Instruction *value_type; + typedef std::ptrdiff_t difference_type; + typedef const Instruction **pointer; + typedef const Instruction *&reference; + typedef std::input_iterator_tag iterator_category; + ///} + + using ExplorerTy = MustBeExecutedContextExplorer; + + MustBeExecutedIterator(const MustBeExecutedIterator &Other) + : Visited(Other.Visited), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator(MustBeExecutedIterator &&Other) + : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) { + if (this != &Other) { + std::swap(Visited, Other.Visited); + std::swap(CurInst, Other.CurInst); + } + return *this; + } + + ~MustBeExecutedIterator() {} + + /// Pre- and post-increment operators. + ///{ + MustBeExecutedIterator &operator++() { + CurInst = advance(); + return *this; + } + + MustBeExecutedIterator operator++(int) { + MustBeExecutedIterator tmp(*this); + operator++(); + return tmp; + } + ///} + + /// Equality and inequality operators. Note that we ignore the history here. + ///{ + bool operator==(const MustBeExecutedIterator &Other) const { + return CurInst == Other.CurInst; + } + + bool operator!=(const MustBeExecutedIterator &Other) const { + return !(*this == Other); + } + ///} + + /// Return the underlying instruction. + const Instruction *&operator*() { return CurInst; } + const Instruction *getCurrentInst() const { return CurInst; } + + /// Return true if \p I was encountered by this iterator already. + bool count(const Instruction *I) const { return Visited.count(I); } + +private: + using VisitedSetTy = DenseSet; + + /// Private constructors. + MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I); + + /// Reset the iterator to its initial state pointing at \p I. + void reset(const Instruction *I); + + /// Try to advance one of the underlying positions (Head or Tail). + /// + /// \return The next instruction in the must be executed context, or nullptr + /// if none was found. + const Instruction *advance(); + + /// A set to track the visited instructions in order to deal with endless + /// loops and recursion. + VisitedSetTy Visited; + + /// A reference to the explorer that created this iterator. + ExplorerTy &Explorer; + + /// The instruction we are currently exposing to the user. There is always an + /// instruction that we know is executed with the given program point, + /// initially the program point itself. + const Instruction *CurInst; + + friend struct MustBeExecutedContextExplorer; +}; + +/// A "must be executed context" for a given program point PP is the set of +/// instructions, potentially before and after PP, that are executed always when +/// PP is reached. The MustBeExecutedContextExplorer an interface to explore +/// "must be executed contexts" in a module through the use of +/// MustBeExecutedIterator. +/// +/// The explorer exposes "must be executed iterators" that traverse the must be +/// executed context. There is little information sharing between iterators as +/// the expected use case involves few iterators for "far apart" instructions. +/// If that changes, we should consider caching more intermediate results. +struct MustBeExecutedContextExplorer { + + /// In the description of the parameters we use PP to denote a program point + /// for which the must be executed context is explored, or put differently, + /// for which the MustBeExecutedIterator is created. + /// + /// \param ExploreInterBlock Flag to indicate if instructions in blocks + /// other than the parent of PP should be + /// explored. + MustBeExecutedContextExplorer(bool ExploreInterBlock) + : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {} + + /// Clean up the dynamically allocated iterators. + ~MustBeExecutedContextExplorer() { + DeleteContainerSeconds(InstructionIteratorMap); + } + + /// Iterator-based interface. \see MustBeExecutedIterator. + ///{ + using iterator = MustBeExecutedIterator; + using const_iterator = const MustBeExecutedIterator; + + /// Return an iterator to explore the context around \p PP. + iterator &begin(const Instruction *PP) { + auto *&It = InstructionIteratorMap[PP]; + if (!It) + It = new iterator(*this, PP); + return *It; + } + + /// Return an iterator to explore the cached context around \p PP. + const_iterator &begin(const Instruction *PP) const { + return *InstructionIteratorMap.lookup(PP); + } + + /// Return an universal end iterator. + ///{ + iterator &end() { return EndIterator; } + iterator &end(const Instruction *) { return EndIterator; } + + const_iterator &end() const { return EndIterator; } + const_iterator &end(const Instruction *) const { return EndIterator; } + ///} + + /// Return an iterator range to explore the context around \p PP. + llvm::iterator_range range(const Instruction *PP) { + return llvm::make_range(begin(PP), end(PP)); + } + + /// Return an iterator range to explore the cached context around \p PP. + llvm::iterator_range range(const Instruction *PP) const { + return llvm::make_range(begin(PP), end(PP)); + } + ///} + + /// Update interface + ///{ + + /// Insert the instruction \p NewI before the instruction \p PosI in the + /// cached information. + void insertInstructionBefore(const Instruction *NewI, + const Instruction *PosI); + + /// Insert the instruction \p NewI after the instruction \p PosI in the + /// cached information. This is not allowed for the last instruction in a + /// basic block. + void insertInstructionAfter(const Instruction *NewI, const Instruction *PosI); + + /// Remove instruction \p I from the cached information. + void removeInstruction(const Instruction *I); + + ///} + + /// Return the next instruction that is guaranteed to be executed after \p PP. + /// + /// \param It The iterator that is used to traverse the must be + /// executed context. + /// \param PP The program point for which the next instruction + /// that is guaranteed to execute is determined. + const Instruction * + getMustBeExecutedNextInstruction(MustBeExecutedIterator &It, + const Instruction *PP); + + /// Parameter that limit the performed exploration. See the constructor for + /// their meaning. + ///{ + const bool ExploreInterBlock; + ///} + +private: + /// Map from instructions to associated must be executed iterators. + DenseMap + InstructionIteratorMap; + + /// A unique end iterator. + MustBeExecutedIterator EndIterator; +}; + +} // namespace llvm #endif diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MustExecute.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -21,6 +23,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "must-execute" + const DenseMap & LoopSafetyInfo::getBlockColors() const { return BlockColors; @@ -430,3 +434,208 @@ return false; } + +/// Return true if \p L might be an endless loop. +static bool maybeEndlessLoop(const Loop &L) { + if (L.getHeader()->getParent()->hasFnAttribute(Attribute::WillReturn)) + return false; + // TODO: Actually try to prove it is not. + // TODO: If maybeEndlessLoop is going to be expensive, cache it. + return true; +} + +static bool mayContainIrreducibleControl(const Function &F, + const LoopInfo *LI) { + if (!LI) + return true; + + using RPOTraversal = ReversePostOrderTraversal; + RPOTraversal FuncRPOT(&F); + return containsIrreducibleCFG(FuncRPOT, *LI); +} + +static void getGuaranteedExecutedBlocks( + const Instruction &TI, + SmallVectorImpl &GuaranteedExecutedBlocks, + const DominatorTree *DT, const LoopInfo *LI) { + const BasicBlock *TIBlock = TI.getParent(); + bool NoThrow = TIBlock->getParent()->doesNotThrow(); + const Loop *L = LI ? LI->getLoopFor(TIBlock) : nullptr; + + if (const BasicBlock *SuccBB = getSuccessorInFirstIteration(TI, L, DT, LI)) + GuaranteedExecutedBlocks.push_back(SuccBB); + + // TODO: This might be better off in findForwardJoinPoint. + // TODO: Check no-throw to this block in the loop not for the whole function + if (DT && !maybeEndlessLoop(*L) && NoThrow && TI.getNumSuccessors() == 2 && + L->isLoopLatch(TIBlock)) { + // TI is a latch of a finite loop. If it dominates all exiting blocks, + // the non backedge has to be taken eventually. + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + bool DominatesAllExitingBlocks = + llvm::all_of(ExitingBlocks, [DT, TIBlock](BasicBlock *ExitingBB) { + return DT->dominates(TIBlock, ExitingBB); + }); + if (DominatesAllExitingBlocks) { + if (TI.getSuccessor(1) == L->getHeader()) + GuaranteedExecutedBlocks.push_back(TI.getSuccessor(0)); + else + GuaranteedExecutedBlocks.push_back(TI.getSuccessor(1)); + } + } +} + +/// Return an instruction that is always executed before the function \p F is +/// left, assuming it is left through a return. +static const Instruction *findFunctionExitJoinPoint(const Function *F, + const DominatorTree *DT) { + const BasicBlock *JoinBB = nullptr; + for (const BasicBlock &BB : *F) { + // Skip all but return instructions. + if (!isa(BB.getTerminator())) + continue; + + // The first return instruction found is the initial join point. + if (!JoinBB) { + JoinBB = &BB; + continue; + } + + // When we find more return instructions the nearest common dominator of all + // of them is known to be executed prior to all of them. + if (DT) { + JoinBB = DT->findNearestCommonDominator(JoinBB, &BB); + assert(JoinBB && "Assumed a common dominator!"); + continue; + } + + // If we do no have a dominator tree we still know that *at least* the entry + // block is executed as we assume the function is left through a return. + // TODO: Improve this by using getMustBeExecutedNextInstruction() from this + // point. + return F->getEntryBlock().getTerminator(); + } + + // If we traversed all the blocks and found the nearest common dominator we + // return it. If we did not find a return instruction we return a nullptr but + // we should indicate a problem instead because the function is never left + // through a return. + return JoinBB ? JoinBB->getTerminator() : nullptr; +} + +/// Lookup \p Key in \p Map and return the result, potentially after +/// initializing the optional through \p Fn(\p args). +template +static V getOrCreateCachedOptional(K Key, DenseMap> &Map, + FnTy &&Fn, ArgsTy &&... args) { + Optional &OptVal = Map[Key]; + if (!OptVal.hasValue()) + OptVal = Fn(std::forward(args)...); + return OptVal.getValue(); +} + +const Instruction * +MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( + MustBeExecutedIterator &It, const Instruction *PP) { + if (!PP) + return PP; + LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n"); + + // If we explore only inside a given basic block we stop at terminators. + if (!ExploreInterBlock && PP->isTerminator()) { + LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n"); + return nullptr; + } + + // The function that contains the current position. + const Function *PPFunc = PP->getFunction(); + + // If we do not traverse the call graph we check if we can make progress in + // the current function. First, check if the instruction is guaranteed to + // transfer execution to the successor. + bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP); + if (!TransfersExecution) + return nullptr; + + // If this is not a terminator we know that there is a single instruction + // after this one that is executed next if control is transfered. If not, + // we can try to go back to a call site we entered earlier. If none exists, we + // do not know any instruction that has to be executd next. + if (!PP->isTerminator()) { + const Instruction *NextPP = PP->getNextNode(); + LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n"); + return NextPP; + } + + // Finally, we have to handle terminators, trivial ones first. + assert(PP->isTerminator() && "Expected a terminator!"); + + // A terminator without a successor is not handled yet. + if (PP->getNumSuccessors() == 0) { + LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n"); + return nullptr; + } + + // A terminator with a single successor, we will continue at the beginning of + // that one. + if (PP->getNumSuccessors() == 1) { + LLVM_DEBUG( + dbgs() << "\tUnconditional terminator, continue with successor\n"); + return &PP->getSuccessor(0)->front(); + } + + LLVM_DEBUG(dbgs() << "\tNo join point found\n"); + return nullptr; +} + +MustBeExecutedIterator::MustBeExecutedIterator( + MustBeExecutedContextExplorer &Explorer, const Instruction *I) + : Explorer(Explorer), CurInst(I) { + reset(I); +} + +void MustBeExecutedIterator::reset(const Instruction *I) { + CurInst = I; + Visited.clear(); + Visited.insert(I); +} + +const Instruction *MustBeExecutedIterator::advance() { + assert(CurInst && "Cannot advance an end iterator!"); + return Explorer.getMustBeExecutedNextInstruction(*this, CurInst); +} + +void MustBeExecutedContextExplorer::insertInstructionBefore( + const Instruction *, const Instruction *) {} + +void MustBeExecutedContextExplorer::insertInstructionAfter( + const Instruction *NewI, const Instruction *PosI) { + bool NewTransfersEx = isGuaranteedToTransferExecutionToSuccessor(NewI); + bool PosTransfersEx = isGuaranteedToTransferExecutionToSuccessor(PosI); + const Instruction *PosINext = PosI->getNextNode(); + for (auto &MapIt : InstructionIteratorMap) { + iterator &It = *MapIt.getSecond(); + if (PosTransfersEx) { + if (It.CurInst != PosI && It.Visited.count(PosI)) { + if (!NewTransfersEx) { + It.reset(MapIt.getFirst()); + continue; + } + It.Visited.insert(NewI); + } + } + } +} + +void MustBeExecutedContextExplorer::removeInstruction(const Instruction *I) { + auto It = InstructionIteratorMap.find(I); + if (It != InstructionIteratorMap.end()) { + delete It->second; + InstructionIteratorMap.erase(It); + } + + for (auto &It : InstructionIteratorMap) + It.getSecond()->Visited.erase(I); +}