Index: include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- include/llvm/Analysis/BasicAliasAnalysis.h +++ include/llvm/Analysis/BasicAliasAnalysis.h @@ -29,6 +29,7 @@ namespace llvm { class AssumptionCache; class DominatorTree; +struct InvariantInfo; class LoopInfo; /// This is the AA result object for the basic, local, and stateless alias @@ -41,44 +42,46 @@ const DataLayout &DL; AssumptionCache &AC; + InvariantInfo *InvRA; DominatorTree *DT; LoopInfo *LI; public: - BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, - AssumptionCache &AC, DominatorTree *DT = nullptr, - LoopInfo *LI = nullptr) - : AAResultBase(TLI), DL(DL), AC(AC), DT(DT), LI(LI) {} - - BasicAAResult(const BasicAAResult &Arg) - : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI) {} - BasicAAResult(BasicAAResult &&Arg) - : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), DT(Arg.DT), - LI(Arg.LI) {} - - /// Handle invalidation events from the new pass manager. - /// - /// By definition, this result is stateless and so remains valid. - bool invalidate(Function &, const PreservedAnalyses &) { return false; } + BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, + AssumptionCache &AC, InvariantInfo *InvInfo = nullptr, + DominatorTree *DT = nullptr, LoopInfo *LI = nullptr) + : AAResultBase(TLI), DL(DL), AC(AC), InvRA(InvInfo), DT(DT), LI(LI) {} + + BasicAAResult(const BasicAAResult &Arg) + : AAResultBase(Arg), DL(Arg.DL), AC(Arg.AC), InvRA(Arg.InvRA), + DT(Arg.DT), LI(Arg.LI) {} + BasicAAResult(BasicAAResult &&Arg) + : AAResultBase(std::move(Arg)), DL(Arg.DL), AC(Arg.AC), InvRA(Arg.InvRA), + DT(Arg.DT), LI(Arg.LI) {} + + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } - AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); - ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); - ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); - /// Chases pointers until we find a (constant global) or not. - bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + /// Chases pointers until we find a (constant global) or not. + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); - /// Get the location associated with a pointer argument of a callsite. - ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); + /// Get the location associated with a pointer argument of a callsite. + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); - /// Returns the behavior when calling the given call site. - FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); + /// Returns the behavior when calling the given call site. + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); - /// Returns the behavior when calling the given function. For use when the - /// call site is not known. - FunctionModRefBehavior getModRefBehavior(const Function *F); + /// Returns the behavior when calling the given function. For use when the + /// call site is not known. + FunctionModRefBehavior getModRefBehavior(const Function *F); private: // A linear transformation of a Value; this class represents ZExt(SExt(V, Index: include/llvm/Analysis/InvariantInfo.h =================================================================== --- /dev/null +++ include/llvm/Analysis/InvariantInfo.h @@ -0,0 +1,216 @@ +//===-------- llvm/InvariantInfo.h - invariant_start/end info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines properties for handling invariant_start/end instructions +// for purposes of load elimination. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INVARIANTINFO_H +#define LLVM_ANALYSIS_INVARIANTINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Pass.h" + +namespace llvm { +template +class AnalysisManager; +class DataLayout; +class DominatorTree; +class Instruction; +class IntrinsicInst; +class LoadInst; +class PreservedAnalyses; +struct PostDominatorTreeT; +class Value; + +/// A data structure to (lazilly) collect and process invariant intrinsic calls; +/// necessary for invariant range analysis via \c InvariantRangeAnalysisPass or +/// \c InvariantInfoAnalysis. +struct InvariantInfo { + typedef SmallVector InvListT; + typedef SmallDenseMap InvListMapT; + typedef SmallDenseMap InvListMapMapT; + + private: + /// \brief A mapping of allocated memory pointer values to corresponding + /// invariant_start/end ranges; Keeps track of the functions that the + /// invariant intrinsics are called from, as well as the addresses that the + /// intrinsics are associated with. + InvListMapMapT AllInvariantsMaps; + + /// \brief Keeps track of functions for which invariant intrinsic calls have + /// been collected into /c AllInvariantsMaps. + SmallSet InvariantsComputed; + + /// \brief The dominator and post-dominator trees, the analyses of which are + /// required by invariant range analysis. + DominatorTree *DT; + PostDominatorTreeT *PDT; + + /// \brief Determines whether range analysis is enabled. If not, it will not + /// be computed (cf. \c queriedFromInvariantRange()). This helps keep the + /// effect of the analysis local to specific passes like GVN. + bool RangeAnalysisIsEnabled; + + public: + InvariantInfo() : DT(nullptr), PDT(nullptr), RangeAnalysisIsEnabled(false) {} + + /// \brief Sets the dominator and post-dominator trees. + void init(DominatorTree *DomTree, PostDominatorTreeT *PostDomTree) { + DT = DomTree; + PDT = PostDomTree; + } + + /// \brief Enables invariant range analysis. + void EnableInvariantRangeAnalysis() { RangeAnalysisIsEnabled = true; } + + /// \brief Retrieves the part of the invariants mapping that is associated + /// with the given function. + InvListMapT &getInvariantsMap(const Function &F); + + /// \brief Access the invariants mapping to extract invariant_start/end + /// instructions that are associated with the given address. + InvListT &getInvariants(const Function &F, const Value *Addr); + + /// \brief Adds an entry to the invariants mapping. + void addInvariant(const Function &F, const Value *Addr, + const IntrinsicInst *IStart); + + /// \brief Populates the invariants mapping for the given function. + void populateInfo(const Function &F); + + /// \brief Determines if the given memory location can be modified by the + /// given instruction, based on invariant range analysis. + bool pointsToReadOnlyMemory(const Instruction *I, const MemoryLocation &Loc, + const DataLayout &DL); + + private: + /// \brief Performs invariant range analysis for some given instruction, + /// based on some allocated memory address: + // If the instruction accesses the given address, then this checks whether + /// there is an invariant range (over the address) that the instruction + /// belongs in. + /// NOTE: Range analysis must be enabled for this computation to go through. + bool queriedFromInvariantRange(const Instruction *I, const Value *Addr); + + typedef SmallSet InstListT; + SmallDenseMap InstructionsInInvariantRange; + + /// \brief Determines if the given instruction was previously found to + /// be in an invariant range over the given address; avoids recomputing + /// \c queriedFromInvariantRange(). + /// FIXME: Try to optimize this by keeping track of computed ranges as well, + /// and then checking the relative positions of the instructions... + bool isInInvariantRange(const Value *Addr, const Instruction *I) { + return InstructionsInInvariantRange[Addr].count(I); + } + + /// \brief Indicates that the given instruction has been found to + /// be in an invariant range over the given address. + void markInInvariantRange(const Value *Addr, const Instruction *I) { + InstructionsInInvariantRange[Addr].insert(I); + } + + public: + /// \brief Various helper functionalities for related analysis passes, e.g., + /// \c InvariantInfoAnalysis, \c InvariantInfoPrinterPass, + /// \c InvariantInfoVerifierPass and \c InvariantRangeAnalysisPass (below). + /// @{ + void clear(); + void verify() const; + void verify(const Function &F) const; + void print(raw_ostream &OS) const; + void print(raw_ostream &OS, const Function &F) const; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif + /// @} +}; + +/// \brief A function analysis which provides an \c InvariantInfo. +/// +/// This analysis is intended for use with the new pass manager and will vend +/// invariant info for a given function. +class InvariantInfoAnalysis { + static char PassID; + + public: + typedef InvariantInfo Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "InvariantInfoAnalysis"; } + + InvariantInfoAnalysis() {} + InvariantInfoAnalysis(const InvariantInfoAnalysis &Arg) {} + InvariantInfoAnalysis(InvariantInfoAnalysis &&Arg) {} + InvariantInfoAnalysis &operator=(const InvariantInfoAnalysis &RHS) { + return *this; + } + InvariantInfoAnalysis &operator=(InvariantInfoAnalysis &&RHS) { + return *this; + } + + InvariantInfo run(Function &F, AnalysisManager *AM); +}; + +/// \brief Printer pass for the \c InvariantInfoAnalysis results. +class InvariantInfoPrinterPass { + raw_ostream &OS; + + public: + explicit InvariantInfoPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager *AM); + static StringRef name() { return "InvariantInfoPrinterPass"; } +}; + +/// \brief Verifier pass for the \c InvariantInfoAnalysis results. +struct InvariantInfoVerifierPass { + PreservedAnalyses run(Function &F, AnalysisManager *AM); + static StringRef name() { return "InvariantInfoVerifierPass"; } +}; + +/// \brief The -invariant-range-analysis pass. +class InvariantRangeAnalysisPass : public FunctionPass { + InvariantInfo InvInfo; + Function *CurFunc; + + public: + static char ID; // Pass identification + explicit InvariantRangeAnalysisPass(); + ~InvariantRangeAnalysisPass() override; + + InvariantInfo &getInvariantInfo() { return InvInfo; } + const InvariantInfo &getInvariantInfo() const { return InvInfo; } + + bool runOnFunction(Function &F) override; + void releaseMemory() override { InvInfo.clear(); } + void verifyAnalysis() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *) const override; + void dump() const; + bool doFinalization(Module &) override { + verifyAnalysis(); + return false; + } +}; + +/// \brief Check if the given instruction is an invariant_start/end. +bool IsInvariantIntrinsic(const IntrinsicInst *const II); + +} // End llvm namespace + +#endif Index: include/llvm/Analysis/PostDominators.h =================================================================== --- include/llvm/Analysis/PostDominators.h +++ include/llvm/Analysis/PostDominators.h @@ -18,78 +18,94 @@ namespace llvm { +/// Concrete subclass of DominatorTreeBase that is used to compute a +/// normal post-dominator tree. +/// FIXME: Rename to PostDominatorTree +struct PostDominatorTreeT : public DominatorTreeBase { + PostDominatorTreeT() : DominatorTreeBase(true) {} + + typedef DominatorTreeBase Base; + + bool dominates(const Instruction *Def, const Instruction *User) const; + + inline bool dominates(DomTreeNode *A, DomTreeNode *B) const { + return Base::dominates(A, B); + } + + inline bool dominates(const BasicBlock *A, const BasicBlock *B) const { + return Base::dominates(A, B); + } +}; + /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used to /// compute the post-dominator tree. /// +/// FIXME: Rename to PostDominatorTreeWrapperPass, spliting PostDominatorTree +/// properties out of here (into above PostDominatorTreeT)... struct PostDominatorTree : public FunctionPass { + private: + PostDominatorTreeT PDT; + + public: static char ID; // Pass identification, replacement for typeid - DominatorTreeBase* DT; PostDominatorTree() : FunctionPass(ID) { initializePostDominatorTreePass(*PassRegistry::getPassRegistry()); - DT = new DominatorTreeBase(true); } ~PostDominatorTree() override; + PostDominatorTreeT &getPostDomTree() { return PDT; } + const PostDominatorTreeT &getPostDomTree() const { return PDT; } + bool runOnFunction(Function &F) override; void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); } + void print(raw_ostream &OS, const Module *) const override; + + /// FIXME: Remove the following methods inline const std::vector &getRoots() const { - return DT->getRoots(); + return PDT.getRoots(); } - inline DomTreeNode *getRootNode() const { - return DT->getRootNode(); - } + inline DomTreeNode *getRootNode() { return PDT.getRootNode(); } inline DomTreeNode *operator[](BasicBlock *BB) const { - return DT->getNode(BB); + return PDT.getNode(BB); } - inline DomTreeNode *getNode(BasicBlock *BB) const { - return DT->getNode(BB); - } + inline DomTreeNode *getNode(BasicBlock *BB) const { return PDT.getNode(BB); } inline bool dominates(DomTreeNode* A, DomTreeNode* B) const { - return DT->dominates(A, B); + return PDT.dominates(A, B); } inline bool dominates(const BasicBlock* A, const BasicBlock* B) const { - return DT->dominates(A, B); + return PDT.dominates(A, B); } inline bool properlyDominates(const DomTreeNode* A, DomTreeNode* B) const { - return DT->properlyDominates(A, B); + return PDT.properlyDominates(A, B); } inline bool properlyDominates(BasicBlock* A, BasicBlock* B) const { - return DT->properlyDominates(A, B); + return PDT.properlyDominates(A, B); } inline BasicBlock *findNearestCommonDominator(BasicBlock *A, BasicBlock *B) { - return DT->findNearestCommonDominator(A, B); - } - - inline const BasicBlock *findNearestCommonDominator(const BasicBlock *A, - const BasicBlock *B) { - return DT->findNearestCommonDominator(A, B); + return PDT.findNearestCommonDominator(A, B); } /// Get all nodes post-dominated by R, including R itself. void getDescendants(BasicBlock *R, SmallVectorImpl &Result) const { - DT->getDescendants(R, Result); + PDT.getDescendants(R, Result); } - void releaseMemory() override { - DT->releaseMemory(); - } - - void print(raw_ostream &OS, const Module*) const override; + virtual void releaseMemory() { PDT.releaseMemory(); } }; FunctionPass* createPostDomTree(); @@ -112,6 +128,27 @@ } }; +/// \brief Analysis pass which computes a \c PostDominatorTree. +class PostDominatorTreeAnalysis { + public: + /// \brief Provide the result typedef for this analysis pass. + typedef PostDominatorTreeT Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Run the analysis pass over a function and produce a dominator tree. + PostDominatorTreeT run(Function &F); + + /// \brief Provide access to a name for this pass for debugging purposes. + static StringRef name() { return "PostDominatorTreeAnalysis"; } + + private: + static char PassID; +}; + +// FIXME: Add printer and verifier passes. + } // End llvm namespace #endif Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -150,6 +150,7 @@ void initializeInstNamerPass(PassRegistry&); void initializeInternalizePassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); +void initializeInvariantRangeAnalysisPassPass(PassRegistry &); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAPass(PassRegistry&); void initializeLICMPass(PassRegistry&); Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -52,6 +52,7 @@ initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); + initializeInvariantRangeAnalysisPassPass(Registry); initializeLazyValueInfoPass(Registry); initializeLintPass(Registry); initializeLoopInfoWrapperPassPass(Registry); Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" @@ -673,6 +674,10 @@ return Alias; } +static bool isInvariantIntrinsic(ImmutableCallSite CS) { + return IsInvariantIntrinsic(dyn_cast(CS.getInstruction())); +} + /// Checks to see if the specified callsite can clobber the specified memory /// object. /// @@ -734,8 +739,22 @@ if (isAssumeIntrinsic(CS)) return MRI_NoModRef; + // Invariant intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS)) return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. - return AAResultBase::getModRefInfo(CS, Loc); + ModRefInfo Mask = AAResultBase::getModRefInfo(CS, Loc); + + // If available, use invariant range analysis to determine if this call could + // modify the memory location. + // NOTE: Only call instructions for now. + // FIXME: Allow all call sites. + if (const CallInst *CI = dyn_cast(CS.getInstruction())) { + if ((Mask & MRI_Mod) && InvRA && InvRA->pointsToReadOnlyMemory(CI, Loc, DL)) + Mask = ModRefInfo(Mask & ~MRI_Mod); + } + + return Mask; } ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, @@ -746,6 +765,10 @@ if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) return MRI_NoModRef; + // Invariant intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS1) || isInvariantIntrinsic(CS2)) + return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS1, CS2); } @@ -1566,6 +1589,7 @@ return BasicAAResult(F.getParent()->getDataLayout(), AM->getResult(F), AM->getResult(F), + &AM->getResult(F), AM->getCachedResult(F), AM->getCachedResult(F)); } @@ -1581,6 +1605,7 @@ "Basic Alias Analysis (stateless AA impl)", true, true) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(InvariantRangeAnalysisPass) INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", "Basic Alias Analysis (stateless AA impl)", true, true) @@ -1591,13 +1616,14 @@ bool BasicAAWrapperPass::runOnFunction(Function &F) { auto &ACT = getAnalysis(); auto &TLIWP = getAnalysis(); + auto &IRAM = getAnalysis(); auto *DTWP = getAnalysisIfAvailable(); auto *LIWP = getAnalysisIfAvailable(); - Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), - ACT.getAssumptionCache(F), - DTWP ? &DTWP->getDomTree() : nullptr, - LIWP ? &LIWP->getLoopInfo() : nullptr)); + Result.reset(new BasicAAResult( + F.getParent()->getDataLayout(), TLIWP.getTLI(), ACT.getAssumptionCache(F), + &IRAM.getInvariantInfo(), DTWP ? &DTWP->getDomTree() : nullptr, + LIWP ? &LIWP->getLoopInfo() : nullptr)); return false; } @@ -1606,11 +1632,15 @@ AU.setPreservesAll(); AU.addRequired(); AU.addRequired(); + AU.addRequired(); } BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { + // TODO: Add and use createInvariantRangeAnalysisPass()? + auto *InvRA = P.getAnalysisIfAvailable(); return BasicAAResult( F.getParent()->getDataLayout(), P.getAnalysis().getTLI(), - P.getAnalysis().getAssumptionCache(F)); + P.getAnalysis().getAssumptionCache(F), + InvRA ? &InvRA->getInvariantInfo() : nullptr); } Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -33,6 +33,7 @@ InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp + InvariantInfo.cpp IteratedDominanceFrontier.cpp LazyCallGraph.cpp LazyValueInfo.cpp Index: lib/Analysis/InvariantInfo.cpp =================================================================== --- /dev/null +++ lib/Analysis/InvariantInfo.cpp @@ -0,0 +1,390 @@ +//===---------- InvariantInfo.cpp - invariant_start/end info --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines properties for handling invariant_start/end instructions +// for purposes of load elimination. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InvariantInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +using namespace llvm; + +// -stats data: +#define DEBUG_TYPE "invariant_info" +STATISTIC(NumInvStart, "Number of invariant_start instructions"); +STATISTIC(NumInvEnd, "Number of invariant_end instructions"); +STATISTIC(NumInvRangeReq, "Number of invariant range analysis requested"); +STATISTIC(NumInvRangeComp, "Number of invariant range analysis computed"); + +InvariantInfo::InvListMapT &InvariantInfo::getInvariantsMap(const Function &F) { + if (!InvariantsComputed.count(&F)) populateInfo(F); + return AllInvariantsMaps[&F]; +} + +InvariantInfo::InvListT &InvariantInfo::getInvariants(const Function &F, + const Value *Addr) { + assert(Addr && "Address must be nonnull."); + + // Retrieve the value from the markers map. + return getInvariantsMap(F)[Addr]; +} + +void InvariantInfo::addInvariant(const Function &F, const Value *Addr, + const IntrinsicInst *II) { + assert(Addr && II && "Value must be nonnull."); + + // Only mark either non-constant global variables or alloca instructions. + if (const GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) return; + } else if (!isa(Addr)) + return; + + InvListT &IIs = getInvariantsMap(F)[Addr]; + IIs.push_back(II); + + // Update -stats data + ++NumInvStart; + NumInvEnd += II->getNumUses(); +} + +void InvariantInfo::populateInfo(const Function &F) { + if (InvariantsComputed.count(&F)) return; + + // Mark the map entry as computed. + InvariantsComputed.insert(&F); + + assert(getInvariantsMap(F).empty() && + "Already have invariant info when scanning!"); + + // Go through all instructions in all blocks, add all invariant_start calls + // to this map. There is no need to explicitly collect invariant_end calls + // because they are uses of the collected invariant_start calls. + for (const BasicBlock &B : F) { + for (const Instruction &I : B) { + if (const IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + Value *Addr = II->getArgOperand(1)->stripPointerCasts(); + addInvariant(F, Addr, II); + } + } + } + } +} + +/// \brief An instruction cannot modify memory if the instruction is in an +/// invariant range over the memory's pointer value. +/// This implementation is modeled after \c pointsToConstantMemory(), with +/// OrLocal == false, and uses \c queriedFromInvariantRange() to check if +/// the given instruction modify the given memory. +/// In practice, this function should be called immediately after +/// \c pointsToConstantMemory() (with OrLocal == false) returns false. +bool InvariantInfo::pointsToReadOnlyMemory(const Instruction *I, + const MemoryLocation &Loc, + const DataLayout &DL) { + // Proceed only if invariant range analysis is enabled. Otherwise, + // this just repeats the same process from pointsToConstantMemory(). + if (!RangeAnalysisIsEnabled) return false; + + SmallPtrSet Visited; + unsigned MaxLookup = 8; + SmallVector Worklist; + Worklist.push_back(Loc.Ptr); + do { + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); + if (!Visited.insert(V).second) return false; + + if (queriedFromInvariantRange(I, V)) continue; + + if (const GlobalVariable *GV = dyn_cast(V)) { + if (GV->isConstant()) continue; + return false; + } + + // If both select values point to readonly memory, then so does the select. + if (const SelectInst *SI = dyn_cast(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to readonly memory, then so + // does the phi. + if (const PHINode *PN = dyn_cast(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) return false; + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); + continue; + } + + // Otherwise be conservative. + return false; + + } while (!Worklist.empty() && --MaxLookup); + + return Worklist.empty(); +} + +/// \brief Match the instruction accessing the given address against +/// invariant_start calls in the map. Then, traverse the calls to determine +/// whether the intruction is dominated by an invariant_start such that either: +/// * the invariant_start has no associated invariant_end, or +/// * an associated invariant_end post-dominates the instruction. +/// Returns true if such an invariant_start is found. +/// NOTE: Invariant_start/end calls nested within other invariant ranges +/// are considered redundant in this analysis. +/// TODO: For global variables, also consider invariant info from global +/// constructors. +/// NOTE: The invariants mapping only processes non-constant global variables +/// or alloca instructions. So, this function returns false for those +/// uncovered kinds of memory pointer values. +bool InvariantInfo::queriedFromInvariantRange(const Instruction *I, + const Value *Addr) { + assert(RangeAnalysisIsEnabled && "Invariant range analysis must be enabled."); + assert(I && Addr && "No analysis without an instruction or an address."); + assert(DT && PDT && + "Both the dominator and the post-dominator tree" + "analyzes are needed for this analysis."); + + // Not designed for constant globals. + if (const GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) return false; + } + + // Also not designed for non-alloca instructions. + if (!isa(Addr)) return false; + + // Update -stats data + ++NumInvRangeReq; + + // Avoid recomputing this range analysis. + if (isInInvariantRange(Addr, I)) return true; + + // Update -stats data + ++NumInvRangeComp; + + const Function *F = I->getFunction(); + InvListT &IIs = getInvariants(*F, Addr); + + for (auto InvsIter = IIs.begin(), InvsEnd = IIs.end(); InvsIter != InvsEnd; + ++InvsIter) { + const IntrinsicInst *IStart = *InvsIter; + + // If this invariant_start dominates the instruction, then check if any + // of its associated invariant_ends post-dominates the instruction. + // Otherwise, skip to the next invariant_start. + if (!DT->dominates(IStart, I)) continue; + + for (auto *U : IStart->users()) { + const IntrinsicInst *IEnd = cast(U); + if (PDT->dominates(IEnd, I)) { + markInInvariantRange(Addr, I); + return true; + } + } + + // If this invariant_start has no user, then this range is invariant + // throughout the end of the lifetime of the underlying object. + if (!IStart->getNumUses()) { + markInInvariantRange(Addr, I); + return true; + } + } + + // Otherwise, the instruction does not belong to an invariant range. + return false; +} + +void InvariantInfo::clear() { + AllInvariantsMaps.shrink_and_clear(); + InvariantsComputed.clear(); + DT = nullptr; + PDT = nullptr; + RangeAnalysisIsEnabled = false; + InstructionsInInvariantRange.shrink_and_clear(); +} + +void InvariantInfo::verify() const { + for (auto InvsMapsIter = AllInvariantsMaps.begin(), + InvsMapsEnd = AllInvariantsMaps.end(); + InvsMapsIter != InvsMapsEnd; ++InvsMapsIter) { + const Function &F = *(InvsMapsIter->first); + verify(F); + } +} + +void InvariantInfo::verify(const Function &F) const { + InvListMapT &InvsMap = + const_cast(this)->AllInvariantsMaps[&F]; + + for (auto InvsMapIter = InvsMap.begin(), InvsMapEnd = InvsMap.end(); + InvsMapIter != InvsMapEnd; ++InvsMapIter) { + auto *Addr = InvsMapIter->first; + InvListT &IIs = InvsMapIter->second; + + for (auto InvsIter = IIs.begin(), InvsEnd = IIs.end(); InvsIter != InvsEnd; + ++InvsIter) { + const IntrinsicInst *II = *InvsIter; + + assert(II->getIntrinsicID() == Intrinsic::invariant_start && + "Only invariant_start calls are collected."); + assert(Addr == II->getArgOperand(1)->stripPointerCasts() && + "Address mismatch in invariant_start."); + + const GlobalVariable *GV = dyn_cast(Addr); + assert((isa(Addr) || (GV && !GV->isConstant())) && + "A matching entry in the map must correspond to " + "a non-constant global variable or an alloca instruction."); + + if (isa(Addr)) + assert(II->getFunction() == cast(Addr)->getFunction() && + "Invariant_start calls on a given alloca instruction must have" + "the same parent function as the alloca instruction."); + + for (auto *U : II->users()) { + const IntrinsicInst *IEnd = cast(U); + + assert(IEnd->getIntrinsicID() == Intrinsic::invariant_end && + "Only invariant_end calls are uses of invariant_start calls."); + assert(II == IEnd->getArgOperand(0)->stripPointerCasts() && + "invariant_start/end."); + assert(IEnd->getArgOperand(2)->stripPointerCasts() == + II->getArgOperand(1)->stripPointerCasts() && + "Address mismatch in invariant_end."); + + if (isa(Addr)) + assert(II->getFunction() == IEnd->getFunction() && + "Invariant_end calls on a given alloca instruction must have" + "the same parent function as its invariant_start."); + } + } + } +} + +void InvariantInfo::print(raw_ostream &OS) const { + for (auto InvsMapsIter = AllInvariantsMaps.begin(), + InvsMapsEnd = AllInvariantsMaps.end(); + InvsMapsIter != InvsMapsEnd; ++InvsMapsIter) { + const Function &F = *(InvsMapsIter->first); + print(OS, F); + } +} + +void InvariantInfo::print(raw_ostream &OS, const Function &F) const { + OS << "Invariant Info for function: " << F.getName() << "\n"; + InvListMapT &InvsMap = const_cast(this)->getInvariantsMap(F); + + for (auto InvsMapIter = InvsMap.begin(), InvsMapEnd = InvsMap.end(); + InvsMapIter != InvsMapEnd; ++InvsMapIter) { + InvListT &IIs = InvsMapIter->second; + OS << *(InvsMapIter->first) << " : \n"; + + for (auto InvsIter = IIs.begin(), InvsEnd = IIs.end(); InvsIter != InvsEnd; + ++InvsIter) { + const IntrinsicInst *II = *InvsIter; + OS << " invariant_start in block: " << II->getParent()->getName() + << "\n"; + + for (auto *U : II->users()) { + const IntrinsicInst *IEnd = cast(U); + OS << " invariant_end in block: " << IEnd->getParent()->getName() + << "\n"; + } + } + } + + OS << "\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void InvariantInfo::dump() const { print(dbgs()); } +#endif + +char InvariantInfoAnalysis::PassID; + +InvariantInfo InvariantInfoAnalysis::run(Function &F, + AnalysisManager *AM) { + InvariantInfo InvInfo; + // InvInfo.init(&AM->getResult(F), + // &AM->getResult(F)); + return InvInfo; +} + +PreservedAnalyses InvariantInfoPrinterPass::run(Function &F, + FunctionAnalysisManager *AM) { + InvariantInfo &InvInfo = AM->getResult(F); + InvInfo.print(OS, F); + return PreservedAnalyses::all(); +} + +PreservedAnalyses InvariantInfoVerifierPass::run(Function &F, + FunctionAnalysisManager *AM) { + InvariantInfo &InvInfo = AM->getResult(F); + InvInfo.verify(F); + return PreservedAnalyses::all(); +} + +InvariantRangeAnalysisPass::InvariantRangeAnalysisPass() + : FunctionPass(ID), CurFunc(nullptr) { + initializeInvariantRangeAnalysisPassPass(*PassRegistry::getPassRegistry()); +} + +InvariantRangeAnalysisPass::~InvariantRangeAnalysisPass() {} + +bool InvariantRangeAnalysisPass::runOnFunction(Function &F) { + InvInfo.init(&getAnalysis().getDomTree(), + &getAnalysis().getPostDomTree()); + CurFunc = &F; + return false; +} + +void InvariantRangeAnalysisPass::verifyAnalysis() const { InvInfo.verify(); } + +void InvariantRangeAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); +} + +/// \brief If a Module object is provided, then print the current content of the +/// invariants map. Otherwise, print the mappings specific to the current +/// function, recomputing the mappings as necessary. +void InvariantRangeAnalysisPass::print(raw_ostream &OS, const Module *M) const { + if (!M) + InvInfo.print(OS); + else if (CurFunc) + InvInfo.print(OS, *CurFunc); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void InvariantRangeAnalysisPass::dump() const { InvInfo.dump(); } +#endif + +char InvariantRangeAnalysisPass::ID = 0; + +INITIALIZE_PASS_BEGIN(InvariantRangeAnalysisPass, "invariant-range-analysis", + "Invariant_start/end analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(InvariantRangeAnalysisPass, "invariant-range-analysis", + "Invariant_start/end analysis", false, true) + +bool llvm::IsInvariantIntrinsic(const IntrinsicInst *II) { + return II && (II->getIntrinsicID() == Intrinsic::invariant_start || + II->getIntrinsicID() == Intrinsic::invariant_end); +} Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" @@ -504,10 +505,14 @@ while (ScanIt != BB->begin()) { Instruction *Inst = &*--ScanIt; - if (IntrinsicInst *II = dyn_cast(Inst)) + if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependencies. if (isa(II)) continue; + // Same for invariant intrinsics + if (IsInvariantIntrinsic(II)) continue; + } + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; Index: lib/Analysis/PostDominators.cpp =================================================================== --- lib/Analysis/PostDominators.cpp +++ lib/Analysis/PostDominators.cpp @@ -26,25 +26,94 @@ // PostDominatorTree Implementation //===----------------------------------------------------------------------===// +// dominates - Return true if Def dominates a use in User. This performs +// the special checks necessary if Def and User are in the same basic block. +// Note that Def doesn't dominate a use in Def itself! +bool PostDominatorTreeT::dominates(const Instruction *Def, + const Instruction *User) const { + const BasicBlock *UseBB = User->getParent(); + const BasicBlock *DefBB = Def->getParent(); + + // Any unreachable use is post-dominated, even if Def == User. + // NOTE: isReachableFromEntry() here means "can reach exit from ...". + if (!isReachableFromEntry(getNode(const_cast(UseBB)))) + return true; + + // Unreachable definitions don't post-dominate anything. + // NOTE: isReachableFromEntry() here means "can reach exit from ...". + if (!isReachableFromEntry(getNode(const_cast(DefBB)))) + return false; + + // An instruction doesn't dominate a use in itself. + if (Def == User) return false; + + // A PHI node post-dominates an instruction if every possible use in DefBB + // post-dominates the instruction. + // TODO: Special case for invoke instructions: + // Allow invoke instructions to be post-dominated by instructions in + // their normal destination path to allow further optimizations from + // invariant range analysis such as the elimination of the second load + // instruction in + // + // + // try { ... + // @llvm.invariant.start(..., bitcast i32* %i to i8*) + // load %i + // invoke(...) + // load %i + // ... + // @llvm.invariant.end(..., bitcast i32* %i to i8*) + // } catch (...) { ... } + // + // + if (isa(Def)) { + if (DefBB == UseBB) return false; + return dominates(DefBB, UseBB); + } + + if (DefBB != UseBB) return dominates(DefBB, UseBB); + + // Loop through the basic block, in reverse order, until we find Def or User. + BasicBlock::const_reverse_iterator I = DefBB->rbegin(); + for (; &*I != Def && &*I != User; ++I) /*empty*/; + + return &*I == Def; +} + char PostDominatorTree::ID = 0; INITIALIZE_PASS(PostDominatorTree, "postdomtree", "Post-Dominator Tree Construction", true, true) +PostDominatorTree::~PostDominatorTree() {} + bool PostDominatorTree::runOnFunction(Function &F) { - DT->recalculate(F); + PDT.recalculate(F); return false; } -PostDominatorTree::~PostDominatorTree() { - delete DT; -} - void PostDominatorTree::print(raw_ostream &OS, const Module *) const { - DT->print(OS); + PDT.print(OS); } - FunctionPass* llvm::createPostDomTree() { return new PostDominatorTree(); } +//===----------------------------------------------------------------------===// +// PostDominatorTreeAnalysis and related pass implementations +//===----------------------------------------------------------------------===// +// +// This implements the PostDominatorTreeAnalysis which is used with the new pass +// manager. It also implements some methods from utility passes. +// +// FIXME: Complete this (printer, verifier, etc...). +// +//===----------------------------------------------------------------------===// + +PostDominatorTreeT PostDominatorTreeAnalysis::run(Function &F) { + PostDominatorTreeT PDT; + PDT.recalculate(F); + return PDT; +} + +char PostDominatorTreeAnalysis::PassID; Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/Passes/PassBuilder.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ScalarEvolution.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -53,6 +53,7 @@ #endif FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) +FUNCTION_ANALYSIS("invariant-info", InvariantInfoAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) @@ -73,10 +74,12 @@ FUNCTION_PASS("print", PrintFunctionPass(dbgs())) FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) +FUNCTION_PASS("print", InvariantInfoPrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", ScalarEvolutionPrinterPass(dbgs())) FUNCTION_PASS("simplify-cfg", SimplifyCFGPass()) FUNCTION_PASS("sroa", SROA()) FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) +FUNCTION_PASS("verify", InvariantInfoVerifierPass()) #undef FUNCTION_PASS Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -30,6 +30,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -696,8 +697,10 @@ AU.addRequired(); AU.addRequired(); AU.addRequired(); - if (!NoLoads) + if (!NoLoads) { AU.addRequired(); + AU.addRequired(); + } AU.addRequired(); AU.addPreserved(); @@ -748,6 +751,7 @@ INITIALIZE_PASS_BEGIN(GVN, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) +INITIALIZE_PASS_DEPENDENCY(InvariantRangeAnalysisPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) @@ -2426,8 +2430,13 @@ if (skipOptnoneFunction(F)) return false; - if (!NoLoads) + if (!NoLoads) { MD = &getAnalysis(); + + // Enable invariant range analysis here to keep its effect local to GVN. + auto &InvRA = getAnalysis(); + InvRA.getInvariantInfo().EnableInvariantRangeAnalysis(); + } DT = &getAnalysis().getDomTree(); AC = &getAnalysis().getAssumptionCache(F); TLI = &getAnalysis().getTLI(); Index: lib/Transforms/Scalar/LoopIdiomRecognize.cpp =================================================================== --- lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -41,6 +41,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpander.h" @@ -102,6 +103,7 @@ AU.addPreserved(); AU.addRequired(); AU.addRequired(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); } @@ -147,6 +149,8 @@ } // End anonymous namespace. +// NOTE: \c InvariantRangeAnalysisPass is needed here because it is required by +// \c BasicAAWrapperPass. char LoopIdiomRecognize::ID = 0; INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", false, false) @@ -156,6 +160,7 @@ INITIALIZE_PASS_DEPENDENCY(LCSSA) INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(InvariantRangeAnalysisPass) INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) Index: test/Analysis/InvariantInfo/gvn-basic.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/gvn-basic.ll @@ -0,0 +1,128 @@ +; RUN: opt < %s -S -gvn 2>&1 | FileCheck %s +; RUN: opt < %s -gvn -stats 2>&1 | FileCheck %s -check-prefix=STATS + +; Tests elementary load elimination using call instructions as potential +; clobbers. Also checks statistical data generation. +; NOTE: "-gvn" expands to +; "-targetlibinfo -tti -assumption-cache-tracker -domtree " +; "-postdomtree -invariant-range-analysis -basicaa -aa -memdep " +; "-gvn -verify" + +; STATS: 2 gvn - Number of blocks merged +; STATS: 2 gvn - Number of equalities propagated +; STATS: 6 gvn - Number of instructions deleted +; STATS: 6 gvn - Number of loads deleted +; STATS: 12 invariant_info - Number of invariant range analysis computed +; STATS: 14 invariant_info - Number of invariant range analysis requested +; STATS: 4 invariant_info - Number of invariant_end instructions +; STATS: 4 invariant_info - Number of invariant_start instructions +; STATS: 1 memdep - Number of block queries that were completely cached +; STATS: 1 memdep - Number of uncached non-local ptr responses + +; A simple single range. +define void @simple_range() { +; CHECK: define void @simple_range() +entry: + %i = alloca i32 + call void @foo(i32* %i) ; #1 + + %bi = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %i ; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + call void @foo(i32* %i) ; #2 + %ld2 = load i32, i32* %i ; Not clobbered by #2; Merged into %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld2) + call void @foo(i32* %i) ; #3 + + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld3 = load i32, i32* %i ; Not clobbered by #3, nor #2; Merged into %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld3) + call void @foo(i32* %i) ; #4 + %ld4 = load i32, i32* %i ; Clobbered by #4; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld4) + ret void +} + +; Multiple ranges, spanning different branches. +define void @multi_branch_ranges() { +; CHECK: define void @multi_branch_ranges() +entry: + %i = alloca i32 + call void @foo(i32* %i) ; #1 + + %bi = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #2 + br label %next + +next: + %ld1 = load i32, i32* %i ; Not clobbered by #2; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld2 = load i32, i32* %i ; Merged into %2. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld2) + + %inv2 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #3 + %ld3 = load i32, i32* %i ; Clobbered by #3; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld3) + %cond = icmp eq i32 %ld1, %ld3 + br i1 %cond, label %same, label %end + +same: + call void @foo(i32* %i) ; #4 + %ld4 = load i32, i32* %i ; Not clobbered by #4; Merged into %ld3 == %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld4) + + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %bi) ; Redundant -- since nested within another invariant range. + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #5 + %ld5 = load i32, i32* %i ; Not clobbered by #5; Merged into %ld3 == %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld5) + br label %end_same + +end_same: + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %bi) ; invariant_start %inv2 ends here. + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #6 + %ld6 = load i32, i32* %i ; Clobbered by #6; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld6) + + %inv3 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #7 + %ld7 = load i32, i32* %i ; Not clobbered by #7; Merged into %ld6. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld7) + br label %end + +end: + call void @foo(i32* %i) ; #8 + %ld8 = load i32, i32* %i ; Clobbered by #8; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld8) + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) + Index: test/Analysis/InvariantInfo/gvn.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/gvn.ll @@ -0,0 +1,158 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + +; Tests elementary load elimination using instructions other than call +; instructions as potential clobbers. + +; select instructions: +; Loading from a select instruction is clobbered by any instruction that +; clobbers at least one of the select values. It is not clobbered by +; instructions that do not clobber any of the select values. +define void @select() { +; CHECK: define void @select() +entry: + %i = alloca i32 + %j = alloca i32 + %cond = call i1 @condition(); + %ij = select i1 %cond, i32* %i, i32* %j + call void @foo(i32* %i, i32* %j) ; #1 + + %bi = bitcast i32* %i to i8* + %invi = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %ij ; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + call void @foo(i32* %i, i32* %j) ; #2 + %ld2 = load i32, i32* %ij ; Clobbered by #2; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld2) + call void @foo(i32* %i, i32* %j) ; #3 + + %bj = bitcast i32* %j to i8* + %invj = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld3 = load i32, i32* %ij ; Clobbered by #3; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld3) + call void @foo(i32* %i, i32* %j) ; #4 + %ld4 = load i32, i32* %ij ; Not clobbered by #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld4) + call void @foo(i32* %i, i32* %j) ; #5 + + call void @llvm.invariant.end({}* %invj, i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld5 = load i32, i32* %ij ; Not clobbered by #5, nor #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld5) + call void @foo(i32* %i, i32* %j) ; #6 + %ld6 = load i32, i32* %ij ; Clobbered by #6; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld6) + call void @foo(i32* %i, i32* %j) ; #7 + + call void @llvm.invariant.end({}* %invi, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld7 = load i32, i32* %ij ; Clobbered by #7; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld7) + call void @foo(i32* %i, i32* %j) ; #8 + %ld8 = load i32, i32* %ij ; Clobbered by #8; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld8) + + ret void +} + +; PHI nodes: +; Loading from a phi node is clobbered by any instruction that clobbers +; at least one of the incoming values. It is not clobbered by instructions +; that do not clobber any of the incoming values. +define void @phi_node() { +; CHECK: define void @phi_node() +entry: + %i = alloca i32 + %j = alloca i32 + %cond = call i1 @condition(); + br i1 %cond, label %iside, label %jside + +iside: ; Some fillers... + call void @foo(i32* %i, i32* %j) + br label %both + +jside: ; Some fillers... + call void @foo(i32* %i, i32* %j) + %ldj = load i32, i32* %j + call void @bar(i32 %ldj) + call void @foo(i32* %i, i32* %j) + br label %both + +both: ; The rest is similar to select() above. + %ij = phi i32* [ %i, %iside ], [ %j, %jside ] + call void @foo(i32* %i, i32* %j) ; #1 + + %bi = bitcast i32* %i to i8* + %invi = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %ij ; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + call void @foo(i32* %i, i32* %j) ; #2 + %ld2 = load i32, i32* %ij ; Clobbered by #2; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld2) + call void @foo(i32* %i, i32* %j) ; #3 + + %bj = bitcast i32* %j to i8* + %invj = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld3 = load i32, i32* %ij ; Clobbered by #3; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld3) + call void @foo(i32* %i, i32* %j) ; #4 + %ld4 = load i32, i32* %ij ; Not clobbered by #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld4) + call void @foo(i32* %i, i32* %j) ; #5 + + call void @llvm.invariant.end({}* %invj, i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld5 = load i32, i32* %ij ; Not clobbered by #5, nor #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld5) + call void @foo(i32* %i, i32* %j) ; #6 + %ld6 = load i32, i32* %ij ; Clobbered by #6; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld6) + call void @foo(i32* %i, i32* %j) ; #7 + + call void @llvm.invariant.end({}* %invi, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld7 = load i32, i32* %ij ; Clobbered by #7; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld7) + call void @foo(i32* %i, i32* %j) ; #8 + %ld8 = load i32, i32* %ij ; Clobbered by #8; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld8) + + ret void +} + +; TODO: Test other instructions that can modify memory. +; Examples: +; define void @invoke() { ... } +; define void @store() { ... } +; define void @vaarg() { ... } +; define void @catchpad() { ... } +; define void @catchreturn() { ... } +; ... +; define void @globalvar() { ... } +; ... + +declare i1 @condition() +declare void @bar(i32) readonly +declare void @foo(i32*, i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) + Index: test/Analysis/InvariantInfo/print.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/print.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -invariant-range-analysis -analyze 2>&1 | FileCheck %s +; RUN: opt < %s -invariant-range-analysis -disable-output -passes="print" 2>&1 | FileCheck %s +; RUN: opt < %s -invariant-range-analysis -disable-output -passes="print,verify" 2>&1 | FileCheck %s +; RUN: opt < %s -invariant-range-analysis -disable-output -passes="print,verify" -stats 2>&1 | FileCheck %s -check-prefix=STATS + +; Tests printing, verification, and statistics functionality for invariant +; range analysis. + +; CHECK: Invariant Info for function: simple_range +; CHECK-NEXT: %i = alloca i32 : +; CHECK-NEXT: invariant_start in block: entry +; CHECK-NEXT: invariant_end in block: entry + +; CHECK: Invariant Info for function: multi_branch_ranges +; CHECK-NEXT: %i = alloca i32 : +; CHECK-NEXT: invariant_start in block: entry +; CHECK-NEXT: invariant_end in block: next +; CHECK-NEXT: invariant_start in block: next +; CHECK-NEXT: invariant_end in block: end_same +; CHECK-NEXT: invariant_end in block: same +; CHECK-NEXT: invariant_start in block: end_same +; CHECK-NOT: invariant_end + +; STATS: 4 invariant_info - Number of invariant_end instructions +; STATS: 4 invariant_info - Number of invariant_start instructions + +; A simple single range. +define void @simple_range() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @foo(i32* %i) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + call void @foo(i32* %i) + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + ret void +} + +; Multiple ranges, spanning different branches. +define void @multi_branch_ranges() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + br label %next + +next: + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + %inv2 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + %cond = icmp eq i32 %ld2, %ld3 + br i1 %cond, label %same, label %end + +same: + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld5 = load i32, i32* %i + call void @bar(i32 %ld5) + br label %end_same + +end_same: + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld6 = load i32, i32* %i + call void @bar(i32 %ld6) + %inv3 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld7 = load i32, i32* %i + call void @bar(i32 %ld7) + br label %end + +end: + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) + Index: test/Analysis/InvariantInfo/verify.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/verify.ll @@ -0,0 +1,79 @@ +; RUN: opt < %s -invariant-range-analysis -disable-output -verify + +; Checks the validity of our uses of invariant_start/end pairs, +; based on the legacy pass manager. + +; A simple single range. +define void @simple_range() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @foo(i32* %i) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + call void @foo(i32* %i) + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + ret void +} + +; Multiple ranges, spanning different branches. +define void @multi_branch_ranges() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + br label %next + +next: + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + %inv2 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + %cond = icmp eq i32 %ld2, %ld3 + br i1 %cond, label %same, label %end + +same: + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld5 = load i32, i32* %i + call void @bar(i32 %ld5) + br label %end_same + +end_same: + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld6 = load i32, i32* %i + call void @bar(i32 %ld6) + %inv3 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld7 = load i32, i32* %i + call void @bar(i32 %ld7) + br label %end + +end: + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) +