diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h --- a/llvm/include/llvm/Analysis/MustExecute.h +++ b/llvm/include/llvm/Analysis/MustExecute.h @@ -7,10 +7,17 @@ //===----------------------------------------------------------------------===// /// \file /// Contains a collection of routines for determining if a given instruction is -/// guaranteed to execute if a given point in control flow is reached. The most +/// guaranteed to execute if a given point in control flow is reached. The most /// common example is an instruction within a loop being provably executed if we /// branch to the header of it's containing loop. /// +/// There are two interfaces available to determine if an instruction is +/// executed once a given point in the control flow is reached: +/// 1) A loop-centric one derived from LoopSafetyInfo. +/// 2) A "must be executed context"-based one implemented in the +/// MustBeExecutedContextExplorer. +/// Please refer to the class comments for more information. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H @@ -211,6 +218,243 @@ virtual ~ICFLoopSafetyInfo() {}; }; -} +/// Forward declaration. +struct MustBeExecutedContextExplorer; + +/// Must be executed iterators visit stretches of instructions that are +/// guaranteed to be executed together, potentially with other instruction +/// executed in-between. +/// +/// Given the following code, and assuming all statements are single +/// instructions which transfer execution to the successor (see +/// isGuaranteedToTransferExecutionToSuccessor), there are two possible +/// outcomes. If we start the iterator at A, B, or E, we will visit only A, B, +/// (and E). If we start at C or D, we will visit all instructions A-E. +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// \endcode +/// +/// +/// Below is the example extneded with instructions F and G. Now we assume F +/// might not transfer execution to it's successor G. As a result we get the +/// following visit sets: +/// +/// Start Instruction | Visit Set +/// A | A, B, +/// B, | B, +/// C | C, D, E, F +/// D | D, E, F +/// E | E, F +/// F | F +/// G | G +/// +/// +/// \code +/// A; +/// B; +/// if (...) { +/// C; +/// D; +/// } +/// E; +/// F; // Might not transfer execution to its successor G. +/// G; +/// \endcode +/// +/// +/// Note that the examples show optimal visist sets but not necessarily the ones +/// derived by the explorer depending on the available CFG analyses (see +/// MustBeExecutedContextExplorer). Also note that we, depending on the options, +/// the visit set can contain instructions from other functions. +struct MustBeExecutedIterator { + /// Type declarations that make his class an input iterator. + ///{ + typedef const Instruction *value_type; + typedef std::ptrdiff_t difference_type; + typedef const Instruction **pointer; + typedef const Instruction *&reference; + typedef std::input_iterator_tag iterator_category; + ///} + + using ExplorerTy = MustBeExecutedContextExplorer; + + MustBeExecutedIterator(const MustBeExecutedIterator &Other) + : Visited(Other.Visited), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator(MustBeExecutedIterator &&Other) + : Visited(std::move(Other.Visited)), Explorer(Other.Explorer), + CurInst(Other.CurInst) {} + + MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) { + if (this != &Other) { + std::swap(Visited, Other.Visited); + std::swap(CurInst, Other.CurInst); + } + return *this; + } + + ~MustBeExecutedIterator() {} + + /// Pre- and post-increment operators. + ///{ + MustBeExecutedIterator &operator++() { + CurInst = advance(); + return *this; + } + + MustBeExecutedIterator operator++(int) { + MustBeExecutedIterator tmp(*this); + operator++(); + return tmp; + } + ///} + + /// Equality and inequality operators. Note that we ignore the history here. + ///{ + bool operator==(const MustBeExecutedIterator &Other) const { + return CurInst == Other.CurInst; + } + + bool operator!=(const MustBeExecutedIterator &Other) const { + return !(*this == Other); + } + ///} + + /// Return the underlying instruction. + const Instruction *&operator*() { return CurInst; } + const Instruction *getCurrentInst() const { return CurInst; } + + /// Return true if \p I was encountered by this iterator already. + bool count(const Instruction *I) const { return Visited.count(I); } + +private: + using VisitedSetTy = DenseSet; + + /// Private constructors. + MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I); + + /// Reset the iterator to its initial state pointing at \p I. + void reset(const Instruction *I); + + /// Try to advance one of the underlying positions (Head or Tail). + /// + /// \return The next instruction in the must be executed context, or nullptr + /// if none was found. + const Instruction *advance(); + + /// A set to track the visited instructions in order to deal with endless + /// loops and recursion. + VisitedSetTy Visited; + + /// A reference to the explorer that created this iterator. + ExplorerTy &Explorer; + + /// The instruction we are currently exposing to the user. There is always an + /// instruction that we know is executed with the given program point, + /// initially the program point itself. + const Instruction *CurInst; + + friend struct MustBeExecutedContextExplorer; +}; + +/// A "must be executed context" for a given program point PP is the set of +/// instructions, potentially before and after PP, that are executed always when +/// PP is reached. The MustBeExecutedContextExplorer an interface to explore +/// "must be executed contexts" in a module through the use of +/// MustBeExecutedIterator. +/// +/// The explorer exposes "must be executed iterators" that traverse the must be +/// executed context. There is little information sharing between iterators as +/// the expected use case involves few iterators for "far apart" instructions. +/// If that changes, we should consider caching more intermediate results. +struct MustBeExecutedContextExplorer { + + /// In the description of the parameters we use PP to denote a program point + /// for which the must be executed context is explored, or put differently, + /// for which the MustBeExecutedIterator is created. + /// + /// \param ExploreInterBlock Flag to indicate if instructions in blocks + /// other than the parent of PP should be + /// explored. + MustBeExecutedContextExplorer(bool ExploreInterBlock) + : ExploreInterBlock(ExploreInterBlock), EndIterator(*this, nullptr) {} + + /// Clean up the dynamically allocated iterators. + ~MustBeExecutedContextExplorer() { + DeleteContainerSeconds(InstructionIteratorMap); + } + + /// Iterator-based interface. \see MustBeExecutedIterator. + ///{ + using iterator = MustBeExecutedIterator; + using const_iterator = const MustBeExecutedIterator; + + /// Return an iterator to explore the context around \p PP. + iterator &begin(const Instruction *PP) { + auto *&It = InstructionIteratorMap[PP]; + if (!It) + It = new iterator(*this, PP); + return *It; + } + + /// Return an iterator to explore the cached context around \p PP. + const_iterator &begin(const Instruction *PP) const { + return *InstructionIteratorMap.lookup(PP); + } + + /// Return an universal end iterator. + ///{ + iterator &end() { return EndIterator; } + iterator &end(const Instruction *) { return EndIterator; } + + const_iterator &end() const { return EndIterator; } + const_iterator &end(const Instruction *) const { return EndIterator; } + ///} + + /// Return an iterator range to explore the context around \p PP. + llvm::iterator_range range(const Instruction *PP) { + return llvm::make_range(begin(PP), end(PP)); + } + + /// Return an iterator range to explore the cached context around \p PP. + llvm::iterator_range range(const Instruction *PP) const { + return llvm::make_range(begin(PP), end(PP)); + } + ///} + + /// Return the next instruction that is guaranteed to be executed after \p PP. + /// + /// \param It The iterator that is used to traverse the must be + /// executed context. + /// \param PP The program point for which the next instruction + /// that is guaranteed to execute is determined. + const Instruction * + getMustBeExecutedNextInstruction(MustBeExecutedIterator &It, + const Instruction *PP); + + /// Parameter that limit the performed exploration. See the constructor for + /// their meaning. + ///{ + const bool ExploreInterBlock; + ///} + +private: + /// Map from instructions to associated must be executed iterators. + DenseMap + InstructionIteratorMap; + + /// A unique end iterator. + MustBeExecutedIterator EndIterator; +}; + +} // namespace llvm #endif diff --git a/llvm/lib/Analysis/MustExecute.cpp b/llvm/lib/Analysis/MustExecute.cpp --- a/llvm/lib/Analysis/MustExecute.cpp +++ b/llvm/lib/Analysis/MustExecute.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/MustExecute.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" @@ -21,6 +23,8 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +#define DEBUG_TYPE "must-execute" + const DenseMap & LoopSafetyInfo::getBlockColors() const { return BlockColors; @@ -430,3 +434,75 @@ return false; } + +const Instruction * +MustBeExecutedContextExplorer::getMustBeExecutedNextInstruction( + MustBeExecutedIterator &It, const Instruction *PP) { + if (!PP) + return PP; + LLVM_DEBUG(dbgs() << "Find next instruction for " << *PP << "\n"); + + // If we explore only inside a given basic block we stop at terminators. + if (!ExploreInterBlock && PP->isTerminator()) { + LLVM_DEBUG(dbgs() << "\tReached terminator in intra-block mode, done\n"); + return nullptr; + } + + // If we do not traverse the call graph we check if we can make progress in + // the current function. First, check if the instruction is guaranteed to + // transfer execution to the successor. + bool TransfersExecution = isGuaranteedToTransferExecutionToSuccessor(PP); + if (!TransfersExecution) + return nullptr; + + // If this is not a terminator we know that there is a single instruction + // after this one that is executed next if control is transfered. If not, + // we can try to go back to a call site we entered earlier. If none exists, we + // do not know any instruction that has to be executd next. + if (!PP->isTerminator()) { + const Instruction *NextPP = PP->getNextNode(); + LLVM_DEBUG(dbgs() << "\tIntermediate instruction does transfer control\n"); + return NextPP; + } + + // Finally, we have to handle terminators, trivial ones first. + assert(PP->isTerminator() && "Expected a terminator!"); + + // A terminator without a successor is not handled yet. + if (PP->getNumSuccessors() == 0) { + LLVM_DEBUG(dbgs() << "\tUnhandled terminator\n"); + return nullptr; + } + + // A terminator with a single successor, we will continue at the beginning of + // that one. + if (PP->getNumSuccessors() == 1) { + LLVM_DEBUG( + dbgs() << "\tUnconditional terminator, continue with successor\n"); + return &PP->getSuccessor(0)->front(); + } + + LLVM_DEBUG(dbgs() << "\tNo join point found\n"); + return nullptr; +} + +MustBeExecutedIterator::MustBeExecutedIterator( + MustBeExecutedContextExplorer &Explorer, const Instruction *I) + : Explorer(Explorer), CurInst(I) { + reset(I); +} + +void MustBeExecutedIterator::reset(const Instruction *I) { + CurInst = I; + Visited.clear(); + Visited.insert(I); +} + +const Instruction *MustBeExecutedIterator::advance() { + assert(CurInst && "Cannot advance an end iterator!"); + const Instruction *Next = + Explorer.getMustBeExecutedNextInstruction(*this, CurInst); + if (Next && !Visited.insert(Next).second) + Next = nullptr; + return Next; +} diff --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp --- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -47,6 +47,7 @@ #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/MustExecute.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" @@ -566,8 +567,9 @@ /// This method limits promotion of aggregates to only promote up to three /// elements of the aggregate in order to avoid exploding the number of /// arguments passed in. -static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR, - unsigned MaxElements) { +static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, + AAResults &AAR, unsigned MaxElements, + MustBeExecutedContextExplorer &Explorer) { using GEPIndicesSet = std::set; // Quick exit for unused arguments @@ -618,21 +620,20 @@ return true; }; - // First, iterate the entry block and mark loads of (geps of) arguments as - // safe. - BasicBlock &EntryBlock = Arg->getParent()->front(); // Declare this here so we can reuse it IndicesVector Indices; - for (Instruction &I : EntryBlock) - if (LoadInst *LI = dyn_cast(&I)) { - Value *V = LI->getPointerOperand(); - if (GetElementPtrInst *GEP = dyn_cast(V)) { + // First, iterate over all instructions executed whenever the function is + // reached and mark loads of (geps of) arguments as safe. + BasicBlock &EntryBlock = Arg->getParent()->front(); + for (const Instruction *I : Explorer.range(&EntryBlock.front())) + if (auto *LI = dyn_cast(I)) { + const Value *V = LI->getPointerOperand(); + if (auto *GEP = dyn_cast(V)) { V = GEP->getPointerOperand(); if (V == Arg) { // This load actually loads (part of) Arg? Check the indices then. Indices.reserve(GEP->getNumIndices()); - for (User::op_iterator II = GEP->idx_begin(), IE = GEP->idx_end(); - II != IE; ++II) + for (auto II = GEP->idx_begin(), IE = GEP->idx_end(); II != IE; ++II) if (ConstantInt *CI = dyn_cast(*II)) Indices.push_back(CI->getSExtValue()); else @@ -683,7 +684,8 @@ // TODO: This runs the above loop over and over again for dead GEPs // Couldn't we just do increment the UI iterator earlier and erase the // use? - return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements); + return isSafeToPromoteArgument(Arg, ByValTy, AAR, MaxElements, + Explorer); } if (!UpdateBaseTy(GEP->getSourceElementType())) @@ -926,6 +928,9 @@ AAResults &AAR = AARGetter(*F); + // TODO: This should be removed once we deduce "dereferenceable" properly. + MustBeExecutedContextExplorer Explorer(/* ExploreInterBlock */ true); + // Check to see which arguments are promotable. If an argument is promotable, // add it to ArgsToPromote. SmallPtrSet ArgsToPromote; @@ -1001,7 +1006,7 @@ // Otherwise, see if we can promote the pointer to its value. Type *ByValTy = PtrArg->hasByValAttr() ? PtrArg->getParamByValType() : nullptr; - if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements)) + if (isSafeToPromoteArgument(PtrArg, ByValTy, AAR, MaxElements, Explorer)) ArgsToPromote.insert(PtrArg); } diff --git a/llvm/test/Transforms/ArgumentPromotion/executed_in_entry.ll b/llvm/test/Transforms/ArgumentPromotion/executed_in_entry.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ArgumentPromotion/executed_in_entry.ll @@ -0,0 +1,87 @@ +; RUN: opt < %s -argpromotion -S | FileCheck %s +; RUN: opt < %s -aa-pipeline='basic-aa' -passes=argpromotion -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.S = type { i32, i32 } + +; Make sure we do not assume @unknown() is going to return which would +; allow us to pre-load %s in @caller. (PR42039) +define i32 @caller(%struct.S* %s) { +entry: +; CHECK: %call = call i32 @local(%struct.S* %s) + %call = call i32 @local(%struct.S* %s) + ret i32 %call +} + +define internal i32 @local(%struct.S* noalias %s) { +entry: + call void @unknown() + %a = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 0 + %0 = load i32, i32* %a, align 4 + %b = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 1 + %1 = load i32, i32* %b, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +declare void @unknown() nounwind + + +; Make sure we do assume @unknown_willreturn() is going to return +; which allows us to pre-load %s in @caller_willreturn. +define i32 @caller_willreturn(%struct.S* %s) { +entry: +; CHECK: %call = call i32 @local_willreturn(i32 %{{.*}}, i32 %{{.*}}) + %call = call i32 @local_willreturn(%struct.S* %s) + ret i32 %call +} + +define internal i32 @local_willreturn(%struct.S* noalias %s) { +entry: + call void @unknown_willreturn() + %g0 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 0 + %l0 = load i32, i32* %g0, align 4 + %g1 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 1 + %l1 = load i32, i32* %g1, align 4 + %add = add nsw i32 %l0, %l1 + ret i32 %add +} + +declare void @unknown_willreturn() willreturn nounwind + + +; We should promote %s as it is for sure executed in +; @caller_must_be_executed but not %p as it is not always accessed (completely). +define i32 @caller_must_be_executed(%struct.S* %s, %struct.S* %p, i1 %c) { +entry: +; TODO: %call = call i32 @local_must_be_executed(i32 %{{.*}}, i32 %{{.*}}, %struct.S* %p, i1 %c) + %call = call i32 @local_must_be_executed(%struct.S* %s, %struct.S* %p, i1 %c) + ret i32 %call +} + +define internal i32 @local_must_be_executed(%struct.S* %s, %struct.S* %p, i1 %c) { +entry: + %sg0 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 0 + %sv0 = load i32, i32* %sg0, align 4 + br i1 %c, label %then, label %else + +then: + %pg0 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 0 + %pv0 = load i32, i32* %pg0, align 4 + %add0 = add nsw i32 %sv0, %pv0 + br label %merge + +else: + %pg1 = getelementptr inbounds %struct.S, %struct.S* %p, i64 0, i32 1 + %pv1 = load i32, i32* %pg1, align 4 + %add1 = add nsw i32 %sv0, %pv1 + br label %merge + +merge: + %phi = phi i32 [%add0, %then], [%add1, %else] + %sg1 = getelementptr inbounds %struct.S, %struct.S* %s, i64 0, i32 1 + %sv1 = load i32, i32* %sg1, align 4 + %add = add nsw i32 %phi, %sv1 + ret i32 %add +}