Index: include/llvm/Analysis/BasicAliasAnalysis.h =================================================================== --- include/llvm/Analysis/BasicAliasAnalysis.h +++ include/llvm/Analysis/BasicAliasAnalysis.h @@ -29,6 +29,7 @@ namespace llvm { class AssumptionCache; class DominatorTree; +struct InvariantInfo; class LoopInfo; /// This is the AA result object for the basic, local, and stateless alias @@ -43,44 +44,45 @@ const TargetLibraryInfo &TLI; AssumptionCache &AC; DominatorTree *DT; + InvariantInfo *InvRA; LoopInfo *LI; public: - BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, - AssumptionCache &AC, DominatorTree *DT = nullptr, - LoopInfo *LI = nullptr) - : AAResultBase(), DL(DL), TLI(TLI), AC(AC), DT(DT), LI(LI) {} - - BasicAAResult(const BasicAAResult &Arg) - : AAResultBase(Arg), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), - LI(Arg.LI) {} + BasicAAResult(const DataLayout &DL, const TargetLibraryInfo &TLI, + AssumptionCache &AC, DominatorTree *DT = nullptr, + InvariantInfo *InvInfo = nullptr, LoopInfo *LI = nullptr) + : AAResultBase(), DL(DL), TLI(TLI), AC(AC), DT(DT), InvRA(InvInfo), LI(LI) {} + + BasicAAResult(const BasicAAResult &Arg) + : AAResultBase(), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), + InvRA(Arg.InvRA), LI(Arg.LI) {} BasicAAResult(BasicAAResult &&Arg) : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC), - DT(Arg.DT), LI(Arg.LI) {} + DT(Arg.DT), InvRA(Arg.InvRA), LI(Arg.LI) {} - /// Handle invalidation events from the new pass manager. - /// - /// By definition, this result is stateless and so remains valid. - bool invalidate(Function &, const PreservedAnalyses &) { return false; } + /// Handle invalidation events from the new pass manager. + /// + /// By definition, this result is stateless and so remains valid. + bool invalidate(Function &, const PreservedAnalyses &) { return false; } - AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); + AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB); - ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); + ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc); - ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); + ModRefInfo getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2); - /// Chases pointers until we find a (constant global) or not. - bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); + /// Chases pointers until we find a (constant global) or not. + bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal); - /// Get the location associated with a pointer argument of a callsite. - ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); + /// Get the location associated with a pointer argument of a callsite. + ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx); - /// Returns the behavior when calling the given call site. - FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); + /// Returns the behavior when calling the given call site. + FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS); - /// Returns the behavior when calling the given function. For use when the - /// call site is not known. - FunctionModRefBehavior getModRefBehavior(const Function *F); + /// Returns the behavior when calling the given function. For use when the + /// call site is not known. + FunctionModRefBehavior getModRefBehavior(const Function *F); private: // A linear transformation of a Value; this class represents ZExt(SExt(V, Index: include/llvm/Analysis/InvariantInfo.h =================================================================== --- /dev/null +++ include/llvm/Analysis/InvariantInfo.h @@ -0,0 +1,231 @@ +//===-------- llvm/InvariantInfo.h - invariant_start/end info ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines properties for handling invariant_start/end instructions +// for purposes of load elimination. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_INVARIANTINFO_H +#define LLVM_ANALYSIS_INVARIANTINFO_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Pass.h" +#include "llvm/IR/PassManager.h" + + +namespace llvm { +template +class AnalysisManager; +class DataLayout; +class DominatorTree; +class FunctionPass; +class Instruction; +class IntrinsicInst; +class LoadInst; +class PreservedAnalyses; +struct PostDominatorTree; +class Value; + +/// A data structure to (lazilly) collect and process invariant intrinsic calls; +/// necessary for invariant range analysis via \c InvariantRangeAnalysisPass or +/// \c InvariantInfoAnalysis. +struct InvariantInfo { + typedef SmallVector InvListT; + typedef SmallDenseMap InvListMapT; + typedef SmallDenseMap InvListMapMapT; + + private: + /// \brief A mapping of allocated memory pointer values to corresponding + /// invariant_start/end ranges; Keeps track of the functions that the + /// invariant intrinsics are called from, as well as the addresses that the + /// intrinsics are associated with. + InvListMapMapT AllInvariantsMaps; + + /// \brief Keeps track of functions for which invariant intrinsic calls have + /// been collected into /c AllInvariantsMaps. + SmallSet InvariantsComputed; + + /// \brief The dominator and post-dominator trees, the analyses of which are + /// required by invariant range analysis. + DominatorTree *DT; + PostDominatorTree *PDT; + + /// \brief Determines whether range analysis is enabled. If not, it will not + /// be computed (cf. \c queriedFromInvariantRange()). This helps keep the + /// effect of the analysis local to specific passes like GVN. + bool RangeAnalysisIsEnabled; + + public: + InvariantInfo() : DT(nullptr), PDT(nullptr), RangeAnalysisIsEnabled(false) {} + + /// \brief Sets the dominator and post-dominator trees. + void init(DominatorTree *DomTree, PostDominatorTree *PostDomTree) { + // If the dominator and postdominator trees have changed, then + // repeat the analysis. + if (DT != DomTree || PDT != PostDomTree) + InstructionsInInvariantRange.shrink_and_clear(); + DT = DomTree; + PDT = PostDomTree; + } + + /// \brief Enables invariant range analysis. + void EnableInvariantRangeAnalysis(bool DoEnable = true) { + RangeAnalysisIsEnabled = DoEnable; + } + + /// \brief Retrieves the part of the invariants mapping that is associated + /// with the given function. + InvListMapT &getInvariantsMap(const Function &F); + + /// \brief Access the invariants mapping to extract invariant_start/end + /// instructions that are associated with the given address. + InvListT &getInvariants(const Function &F, const Value *Addr); + + /// \brief Adds an entry to the invariants mapping. + void addInvariant(const Function &F, const Value *Addr, + const IntrinsicInst *IStart); + + /// \brief Populates the invariants mapping for the given function. + void populateInfo(const Function &F); + + /// \brief Determines if the given memory location can be modified by the + /// given instruction, based on invariant range analysis. + bool pointsToReadOnlyMemory(const Instruction *I, const MemoryLocation &Loc, + const DataLayout &DL); + + private: + /// \brief Performs invariant range analysis for some given instruction, + /// based on some allocated memory address: + // If the instruction accesses the given address, then this checks whether + /// there is an invariant range (over the address) that the instruction + /// belongs in. + /// NOTE: Range analysis must be enabled for this computation to go through. + bool queriedFromInvariantRange(const Instruction *I, const Value *Addr); + + typedef SmallSet InstListT; + SmallDenseMap InstructionsInInvariantRange; + + /// \brief Determines if the given instruction was previously found to + /// be in an invariant range over the given address; avoids recomputing + /// \c queriedFromInvariantRange(). + /// FIXME: Try to optimize this by keeping track of computed ranges as well, + /// and then checking the relative positions of the instructions... + bool isInInvariantRange(const Value *Addr, const Instruction *I) { + return InstructionsInInvariantRange[Addr].count(I); + } + + /// \brief Indicates that the given instruction has been found to + /// be in an invariant range over the given address. + void markInInvariantRange(const Value *Addr, const Instruction *I) { + InstructionsInInvariantRange[Addr].insert(I); + } + + public: + /// \brief Various helper functionalities for related analysis passes, e.g., + /// \c InvariantInfoAnalysis, \c InvariantInfoPrinterPass, + /// \c InvariantInfoVerifierPass and \c InvariantRangeAnalysisPass (below). + /// @{ + void clear(); + void verify() const; + void verify(const Function &F) const; + void print(raw_ostream &OS) const; + void print(raw_ostream &OS, const Function &F) const; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif + /// @} +}; + +/// \brief A function analysis which provides an \c InvariantInfo. +/// +/// This analysis is intended for use with the new pass manager and will vend +/// invariant info for a given function. +class InvariantInfoAnalysis + : public AnalysisInfoMixin { + friend AnalysisInfoMixin; + static char PassID; + + public: + typedef InvariantInfo Result; + + /// \brief Opaque, unique identifier for this analysis pass. + static void *ID() { return (void *)&PassID; } + + /// \brief Provide a name for the analysis for debugging and logging. + static StringRef name() { return "InvariantInfoAnalysis"; } + + InvariantInfoAnalysis() {} + InvariantInfoAnalysis(const InvariantInfoAnalysis &Arg) {} + InvariantInfoAnalysis(InvariantInfoAnalysis &&Arg) {} + InvariantInfoAnalysis &operator=(const InvariantInfoAnalysis &RHS) { + return *this; + } + InvariantInfoAnalysis &operator=(InvariantInfoAnalysis &&RHS) { + return *this; + } + + InvariantInfo run(Function &F, AnalysisManager &AM) { + return InvariantInfo(); + } +}; + +/// \brief Printer pass for the \c InvariantInfoAnalysis results. +class InvariantInfoPrinterPass + : public AnalysisInfoMixin { + raw_ostream &OS; + + public: + explicit InvariantInfoPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Function &F, AnalysisManager &AM); + static StringRef name() { return "InvariantInfoPrinterPass"; } +}; + +/// \brief Verifier pass for the \c InvariantInfoAnalysis results. +struct InvariantInfoVerifierPass + : public AnalysisInfoMixin { + PreservedAnalyses run(Function &F, AnalysisManager &AM); + static StringRef name() { return "InvariantInfoVerifierPass"; } +}; + +/// \brief The -invariant-range-analysis pass. +class InvariantRangeAnalysisPass : public ImmutablePass { + InvariantInfo InvInfo; + + public: + static char ID; // Pass identification + explicit InvariantRangeAnalysisPass(); + ~InvariantRangeAnalysisPass() override; + + InvariantInfo &getInvariantInfo() { return InvInfo; } + const InvariantInfo &getInvariantInfo() const { return InvInfo; } + + void InitDependencies(FunctionPass &P); + + void releaseMemory() override { InvInfo.clear(); } + void verifyAnalysis() const override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *) const override; + void dump() const; + bool doFinalization(Module &) override { + verifyAnalysis(); + return false; + } +}; + +/// \brief Check if the given instruction is an invariant_start/end. +bool IsInvariantIntrinsic(const IntrinsicInst *const II); + +} // End llvm namespace + +#endif Index: include/llvm/Analysis/PostDominators.h =================================================================== --- include/llvm/Analysis/PostDominators.h +++ include/llvm/Analysis/PostDominators.h @@ -34,6 +34,16 @@ Base::operator=(std::move(static_cast(RHS))); return *this; } + + bool dominates(const Instruction *Def, const Instruction *User) const; + + inline bool dominates(DomTreeNode *A, DomTreeNode *B) const { + return Base::dominates(A, B); + } + + inline bool dominates(const BasicBlock *A, const BasicBlock *B) const { + return Base::dominates(A, B); + } }; /// \brief Analysis pass which computes a \c PostDominatorTree. Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -155,6 +155,7 @@ void initializeInstNamerPass(PassRegistry&); void initializeInternalizePassPass(PassRegistry&); void initializeIntervalPartitionPass(PassRegistry&); +void initializeInvariantRangeAnalysisPassPass(PassRegistry &); void initializeIRTranslatorPass(PassRegistry &); void initializeJumpThreadingPass(PassRegistry&); void initializeLCSSAPass(PassRegistry&); Index: include/llvm/Transforms/Scalar/GVN.h =================================================================== --- include/llvm/Transforms/Scalar/GVN.h +++ include/llvm/Transforms/Scalar/GVN.h @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" @@ -135,7 +136,7 @@ bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, - MemoryDependenceResults *RunMD); + MemoryDependenceResults *RunMD, InvariantInfo *InvInfo); /// Push a new Value to the LeaderTable onto the list for its value number. void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) { Index: lib/Analysis/BasicAliasAnalysis.cpp =================================================================== --- lib/Analysis/BasicAliasAnalysis.cpp +++ lib/Analysis/BasicAliasAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" @@ -705,6 +706,10 @@ return Alias; } +static bool isInvariantIntrinsic(ImmutableCallSite CS) { + return IsInvariantIntrinsic(dyn_cast(CS.getInstruction())); +} + /// Checks to see if the specified callsite can clobber the specified memory /// object. /// @@ -780,8 +785,22 @@ if (isAssumeIntrinsic(CS)) return MRI_NoModRef; + // Invariant intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS)) return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. - return AAResultBase::getModRefInfo(CS, Loc); + ModRefInfo Mask = AAResultBase::getModRefInfo(CS, Loc); + + // If available, use invariant range analysis to determine if this call could + // modify the memory location. + // NOTE: Only call instructions for now. + // FIXME: Allow all call sites. + if (const CallInst *CI = dyn_cast(CS.getInstruction())) { + if ((Mask & MRI_Mod) && InvRA && InvRA->pointsToReadOnlyMemory(CI, Loc, DL)) + Mask = ModRefInfo(Mask & ~MRI_Mod); + } + + return Mask; } ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, @@ -792,6 +811,10 @@ if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) return MRI_NoModRef; + // Invariant intrinsics follow the same pattern as assume intrinsic. + if (isInvariantIntrinsic(CS1) || isInvariantIntrinsic(CS2)) + return MRI_NoModRef; + // The AAResultBase base class has some smarts, lets use them. return AAResultBase::getModRefInfo(CS1, CS2); } @@ -1613,6 +1636,7 @@ AM.getResult(F), AM.getResult(F), &AM.getResult(F), + AM.getCachedResult(F), AM.getCachedResult(F)); } @@ -1639,12 +1663,14 @@ auto &ACT = getAnalysis(); auto &TLIWP = getAnalysis(); auto &DTWP = getAnalysis(); + auto *IRAP = getAnalysisIfAvailable(); auto *LIWP = getAnalysisIfAvailable(); Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), - ACT.getAssumptionCache(F), &DTWP.getDomTree(), + ACT.getAssumptionCache(F), + &DTWP.getDomTree(), + IRAP ? &IRAP->getInvariantInfo() : nullptr, LIWP ? &LIWP->getLoopInfo() : nullptr)); - return false; } @@ -1656,8 +1682,12 @@ } BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { + // TODO: Add and use createInvariantRangeAnalysisPass()? + auto *InvRA = P.getAnalysisIfAvailable(); return BasicAAResult( F.getParent()->getDataLayout(), P.getAnalysis().getTLI(), - P.getAnalysis().getAssumptionCache(F)); + P.getAnalysis().getAssumptionCache(F), + /*DT = */ nullptr, + InvRA ? &InvRA->getInvariantInfo() : nullptr); } Index: lib/Analysis/CMakeLists.txt =================================================================== --- lib/Analysis/CMakeLists.txt +++ lib/Analysis/CMakeLists.txt @@ -33,6 +33,7 @@ InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp + InvariantInfo.cpp IteratedDominanceFrontier.cpp LazyCallGraph.cpp LazyValueInfo.cpp Index: lib/Analysis/InvariantInfo.cpp =================================================================== --- /dev/null +++ lib/Analysis/InvariantInfo.cpp @@ -0,0 +1,406 @@ +//===---------- InvariantInfo.cpp - invariant_start/end info --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines properties for handling invariant_start/end instructions +// for purposes of load elimination. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/InvariantInfo.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +using namespace llvm; + +// -stats data: +#define DEBUG_TYPE "invariant_info" +STATISTIC(NumInvStart, "Number of invariant_start instructions"); +STATISTIC(NumInvEnd, "Number of invariant_end instructions"); +STATISTIC(NumInvRangeReq, "Number of invariant range analysis requested"); +STATISTIC(NumInvRangeComp, "Number of invariant range analysis computed"); + +InvariantInfo::InvListMapT &InvariantInfo::getInvariantsMap(const Function &F) { + if (!InvariantsComputed.count(&F)) populateInfo(F); + return AllInvariantsMaps[&F]; +} + +InvariantInfo::InvListT &InvariantInfo::getInvariants(const Function &F, + const Value *Addr) { + assert(Addr && "Address must be nonnull."); + + // Retrieve the value from the markers map. + return getInvariantsMap(F)[Addr]; +} + +void InvariantInfo::addInvariant(const Function &F, const Value *Addr, + const IntrinsicInst *II) { + assert(Addr && II && "Value must be nonnull."); + + // Only mark either non-constant global variables or alloca instructions. + if (const GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) return; + } else if (!isa(Addr)) + return; + + InvListT &IIs = getInvariantsMap(F)[Addr]; + IIs.push_back(II); + + // Update -stats data + ++NumInvStart; + NumInvEnd += II->getNumUses(); +} + +void InvariantInfo::populateInfo(const Function &F) { + if (InvariantsComputed.count(&F)) return; + + // Mark the map entry as computed. + InvariantsComputed.insert(&F); + + assert(getInvariantsMap(F).empty() && + "Already have invariant info when scanning!"); + + // Go through all instructions in all blocks, add all invariant_start calls + // to this map. There is no need to explicitly collect invariant_end calls + // because they are uses of the collected invariant_start calls. + for (const BasicBlock &B : F) { + for (const Instruction &I : B) { + if (const IntrinsicInst *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::invariant_start) { + Value *Addr = II->getArgOperand(1)->stripPointerCasts(); + addInvariant(F, Addr, II); + } + } + } + } +} + +/// \brief An instruction cannot modify memory if the instruction is in an +/// invariant range over the memory's pointer value. +/// This implementation is modeled after \c pointsToConstantMemory(), with +/// OrLocal == false, and uses \c queriedFromInvariantRange() to check if +/// the given instruction modify the given memory. +/// In practice, this function should be called immediately after +/// \c pointsToConstantMemory() (with OrLocal == false) returns false. +bool InvariantInfo::pointsToReadOnlyMemory(const Instruction *I, + const MemoryLocation &Loc, + const DataLayout &DL) { + // Proceed only if invariant range analysis is enabled. Otherwise, + // this just repeats the same process from pointsToConstantMemory(). + if (!RangeAnalysisIsEnabled) return false; + + SmallPtrSet Visited; + unsigned MaxLookup = 8; + SmallVector Worklist; + Worklist.push_back(Loc.Ptr); + do { + const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL); + if (!Visited.insert(V).second) return false; + + if (queriedFromInvariantRange(I, V)) continue; + + if (const GlobalVariable *GV = dyn_cast(V)) { + if (GV->isConstant()) continue; + return false; + } + + // If both select values point to readonly memory, then so does the select. + if (const SelectInst *SI = dyn_cast(V)) { + Worklist.push_back(SI->getTrueValue()); + Worklist.push_back(SI->getFalseValue()); + continue; + } + + // If all values incoming to a phi node point to readonly memory, then so + // does the phi. + if (const PHINode *PN = dyn_cast(V)) { + // Don't bother inspecting phi nodes with many operands. + if (PN->getNumIncomingValues() > MaxLookup) return false; + for (Value *IncValue : PN->incoming_values()) + Worklist.push_back(IncValue); + continue; + } + + // Otherwise be conservative. + return false; + + } while (!Worklist.empty() && --MaxLookup); + + return Worklist.empty(); +} + +/// \brief Match the instruction accessing the given address against +/// invariant_start calls in the map. Then, traverse the calls to determine +/// whether the intruction is dominated by an invariant_start such that either: +/// * the invariant_start has no associated invariant_end, or +/// * an associated invariant_end post-dominates the instruction. +/// Returns true if such an invariant_start is found. +/// NOTE: Invariant_start/end calls nested within other invariant ranges +/// are considered redundant in this analysis. +/// TODO: For global variables, also consider invariant info from global +/// constructors. +/// NOTE: The invariants mapping only processes non-constant global variables +/// or alloca instructions. So, this function returns false for those +/// uncovered kinds of memory pointer values. +bool InvariantInfo::queriedFromInvariantRange(const Instruction *I, + const Value *Addr) { + assert(RangeAnalysisIsEnabled && "Invariant range analysis must be enabled."); + assert(I && Addr && "No analysis without an instruction or an address."); + assert(DT && PDT && + "Both the dominator and the post-dominator tree" + "analyzes are needed for this analysis."); + + // Not designed for constant globals. + if (const GlobalVariable *GV = dyn_cast(Addr)) { + if (GV->isConstant()) return false; + } + + // Also not designed for non-alloca instructions. + if (!isa(Addr)) return false; + + // Update -stats data + ++NumInvRangeReq; + + // Avoid recomputing this range analysis. + if (isInInvariantRange(Addr, I)) return true; + + const Function *F = I->getFunction(); + InvListT &IIs = getInvariants(*F, Addr); + + // Update -stats data + // If there are no invariants to process, then this is a trivial request + // that should not be accounted for in the stats. + if (IIs.empty()) + --NumInvRangeReq; + else + ++NumInvRangeComp; + + for (auto InvsIter = IIs.begin(), InvsEnd = IIs.end(); InvsIter != InvsEnd; + ++InvsIter) { + const IntrinsicInst *IStart = *InvsIter; + + // If this invariant_start dominates the instruction, then check if any + // of its associated invariant_ends post-dominates the instruction. + // Otherwise, skip to the next invariant_start. + if (!DT->dominates(IStart, I)) continue; + + for (auto *U : IStart->users()) { + if (const IntrinsicInst *IEnd = dyn_cast(U)) { + assert(IEnd->getIntrinsicID() == Intrinsic::invariant_end && + "Intrinsic user of invairant_start must be intrinsic_end."); + if (PDT->dominates(IEnd, I)) { + markInInvariantRange(Addr, I); + return true; + } + } + + // Skip non-intrinsic uses of invariant_start because they can be either + // select instructions or phi-nodes, which would require any intrinsic_end + // to be in scope. + } + + // If this invariant_start has no user, then this range is invariant + // throughout the end of the lifetime of the underlying object. + if (!IStart->getNumUses()) { + markInInvariantRange(Addr, I); + return true; + } + } + + // Otherwise, the instruction does not belong to an invariant range. + return false; +} + +void InvariantInfo::clear() { + AllInvariantsMaps.shrink_and_clear(); + InvariantsComputed.clear(); + DT = nullptr; + PDT = nullptr; + RangeAnalysisIsEnabled = false; + InstructionsInInvariantRange.shrink_and_clear(); +} + +void InvariantInfo::verify() const { + for (auto InvsMapsIter = AllInvariantsMaps.begin(), + InvsMapsEnd = AllInvariantsMaps.end(); + InvsMapsIter != InvsMapsEnd; ++InvsMapsIter) { + const Function &F = *(InvsMapsIter->first); + verify(F); + } +} + +void InvariantInfo::verify(const Function &F) const { + InvListMapT &InvsMap = + const_cast(this)->AllInvariantsMaps[&F]; + + for (auto InvsMapIter = InvsMap.begin(), InvsMapEnd = InvsMap.end(); + InvsMapIter != InvsMapEnd; ++InvsMapIter) { + auto *Addr = InvsMapIter->first; + InvListT &IIs = InvsMapIter->second; + + for (auto InvsIter = IIs.begin(), InvsEnd = IIs.end(); InvsIter != InvsEnd; + ++InvsIter) { + const IntrinsicInst *II = *InvsIter; + + assert(II->getIntrinsicID() == Intrinsic::invariant_start && + "Only invariant_start calls are collected."); + assert(Addr == II->getArgOperand(1)->stripPointerCasts() && + "Address mismatch in invariant_start."); + + const GlobalVariable *GV = dyn_cast(Addr); + (void)GV; // Avoid unused warnings. + assert((isa(Addr) || (GV && !GV->isConstant())) && + "A matching entry in the map must correspond to " + "a non-constant global variable or an alloca instruction."); + + if (isa(Addr)) + assert(II->getFunction() == cast(Addr)->getFunction() && + "Invariant_start calls on a given alloca instruction must have" + "the same parent function as the alloca instruction."); + + for (auto *U : II->users()) { + if (const IntrinsicInst *IEnd = dyn_cast(U)) { + (void)IEnd; // Avoid unused warnings. + assert(IEnd->getIntrinsicID() == Intrinsic::invariant_end && + "Only invariant_end calls are intrinsic uses of " + "invariant_start calls."); + assert(II == IEnd->getArgOperand(0)->stripPointerCasts() && + "invariant_start/end."); + assert(IEnd->getArgOperand(2)->stripPointerCasts() == + II->getArgOperand(1)->stripPointerCasts() && + "Address mismatch in invariant_end."); + + if (isa(Addr)) + assert(II->getFunction() == IEnd->getFunction() && + "Invariant_end calls on a given alloca instruction must have" + "the same parent function as its invariant_start."); + } else { + assert((isa(U) || isa(U)) && + "Non-intrinsic uses of invariant_start must be either " + "select instructions or phi-nodes."); + } + } + } + } +} + +void InvariantInfo::print(raw_ostream &OS) const { + for (auto InvsMapsIter = AllInvariantsMaps.begin(), + InvsMapsEnd = AllInvariantsMaps.end(); + InvsMapsIter != InvsMapsEnd; ++InvsMapsIter) { + const Function &F = *(InvsMapsIter->first); + print(OS, F); + } +} + +void InvariantInfo::print(raw_ostream &OS, const Function &F) const { + OS << "Invariant Info for function: " << F.getName() << "\n"; + InvListMapT &InvsMap = const_cast(this)->getInvariantsMap(F); + + for (auto InvsMapIter = InvsMap.begin(), InvsMapEnd = InvsMap.end(); + InvsMapIter != InvsMapEnd; ++InvsMapIter) { + InvListT &IIs = InvsMapIter->second; + OS << *(InvsMapIter->first) << " : \n"; + + for (auto InvsIter = IIs.begin(), InvsEnd = IIs.end(); InvsIter != InvsEnd; + ++InvsIter) { + const IntrinsicInst *II = *InvsIter; + OS << " invariant_start in block: " << II->getParent()->getName() + << "\n"; + + for (auto *U : II->users()) { + if (const IntrinsicInst *IEnd = dyn_cast(U)) { + OS << " invariant_end in block: " << IEnd->getParent()->getName() + << "\n"; + } + } + } + } + + OS << "\n"; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void InvariantInfo::dump() const { print(dbgs()); } +#endif + +char InvariantInfoAnalysis::PassID; + +PreservedAnalyses InvariantInfoPrinterPass::run(Function &F, + FunctionAnalysisManager &AM) { + InvariantInfo &InvInfo = AM.getResult(F); + InvInfo.print(OS, F); + return PreservedAnalyses::all(); +} + +PreservedAnalyses InvariantInfoVerifierPass::run(Function &F, + FunctionAnalysisManager &AM) { + InvariantInfo &InvInfo = AM.getResult(F); + InvInfo.verify(F); + return PreservedAnalyses::all(); +} + +InvariantRangeAnalysisPass::InvariantRangeAnalysisPass() : ImmutablePass(ID) { + initializeInvariantRangeAnalysisPassPass(*PassRegistry::getPassRegistry()); +} + +InvariantRangeAnalysisPass::~InvariantRangeAnalysisPass() { } + +/// This invariant range analysis is expected to run over some given function, based +/// on both dominator and post-dominator analysis information. +/// This function initializes both analysis passes for this analysis pass. +void InvariantRangeAnalysisPass::InitDependencies(FunctionPass &P) { + auto &DT = P.getAnalysis(); + auto &PDT = P.getAnalysis(); + InvInfo.init(&DT.getDomTree(), &PDT.getPostDomTree()); +} + +void InvariantRangeAnalysisPass::verifyAnalysis() const { InvInfo.verify(); } + +void InvariantRangeAnalysisPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); +} + +/// \brief If a Module object is provided, then print the current content of the +/// invariants map. Otherwise, print the mappings specific to the current +/// function, recomputing the mappings as necessary. +void InvariantRangeAnalysisPass::print(raw_ostream &OS, const Module *M) const { + if (!M) + InvInfo.print(OS); + else { + // If a module is explicitly provided, e.g. with -analyze, then dump all + // the functions in the module (that are not declarations), populating the + // invariant maps as necessary. + for (const Function &F : M->functions()) { + if (!F.isDeclaration()) + InvInfo.print(OS, F); + } + } +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void InvariantRangeAnalysisPass::dump() const { InvInfo.dump(); } +#endif + +char InvariantRangeAnalysisPass::ID = 0; + +INITIALIZE_PASS(InvariantRangeAnalysisPass, "invariant-range-analysis", + "Invariant_start/end analysis", false, true) + +bool llvm::IsInvariantIntrinsic(const IntrinsicInst *II) { + return II && (II->getIntrinsicID() == Intrinsic::invariant_start || + II->getIntrinsicID() == Intrinsic::invariant_end); +} Index: lib/Analysis/MemoryDependenceAnalysis.cpp =================================================================== --- lib/Analysis/MemoryDependenceAnalysis.cpp +++ lib/Analysis/MemoryDependenceAnalysis.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/OrderedBasicBlock.h" @@ -469,11 +470,15 @@ while (ScanIt != BB->begin()) { Instruction *Inst = &*--ScanIt; - if (IntrinsicInst *II = dyn_cast(Inst)) + if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependencies. if (isa(II)) continue; + // Same for invariant intrinsics + if (IsInvariantIntrinsic(II)) continue; + } + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; Index: lib/Analysis/PostDominators.cpp =================================================================== --- lib/Analysis/PostDominators.cpp +++ lib/Analysis/PostDominators.cpp @@ -27,6 +27,60 @@ // PostDominatorTree Implementation //===----------------------------------------------------------------------===// +// dominates - Return true if Def dominates a use in User. This performs +// the special checks necessary if Def and User are in the same basic block. +// Note that Def doesn't dominate a use in Def itself! +bool PostDominatorTree::dominates(const Instruction *Def, + const Instruction *User) const { + const BasicBlock *UseBB = User->getParent(); + const BasicBlock *DefBB = Def->getParent(); + + // Any unreachable use is post-dominated, even if Def == User. + // NOTE: isReachableFromEntry() here means "can reach exit from ...". + if (!isReachableFromEntry(getNode(const_cast(UseBB)))) + return true; + + // Unreachable definitions don't post-dominate anything. + // NOTE: isReachableFromEntry() here means "can reach exit from ...". + if (!isReachableFromEntry(getNode(const_cast(DefBB)))) + return false; + + // An instruction doesn't dominate a use in itself. + if (Def == User) return false; + + // A PHI node post-dominates an instruction if every possible use in DefBB + // post-dominates the instruction. + // TODO: Special case for invoke instructions: + // Allow invoke instructions to be post-dominated by instructions in + // their normal destination path to allow further optimizations from + // invariant range analysis such as the elimination of the second load + // instruction in + // + // + // try { ... + // @llvm.invariant.start(..., bitcast i32* %i to i8*) + // load %i + // invoke(...) + // load %i + // ... + // @llvm.invariant.end(..., bitcast i32* %i to i8*) + // } catch (...) { ... } + // + // + if (isa(Def)) { + if (DefBB == UseBB) return false; + return dominates(DefBB, UseBB); + } + + if (DefBB != UseBB) return dominates(DefBB, UseBB); + + // Loop through the basic block, in reverse order, until we find Def or User. + BasicBlock::const_reverse_iterator I = DefBB->rbegin(); + for (; &*I != Def && &*I != User; ++I) /*empty*/; + + return &*I == Def; +} + char PostDominatorTreeWrapperPass::ID = 0; INITIALIZE_PASS(PostDominatorTreeWrapperPass, "postdomtree", "Post-Dominator Tree Construction", true, true) Index: lib/Passes/PassBuilder.cpp =================================================================== --- lib/Passes/PassBuilder.cpp +++ lib/Passes/PassBuilder.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/DominanceFrontier.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InvariantInfo.h" #include "llvm/Analysis/LazyCallGraph.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" Index: lib/Passes/PassRegistry.def =================================================================== --- lib/Passes/PassRegistry.def +++ lib/Passes/PassRegistry.def @@ -67,6 +67,7 @@ FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis()) +FUNCTION_ANALYSIS("invariant-info", InvariantInfoAnalysis()) FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) FUNCTION_ANALYSIS("loops", LoopAnalysis()) FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis()) @@ -104,6 +105,7 @@ FUNCTION_PASS("print", AssumptionPrinterPass(dbgs())) FUNCTION_PASS("print", DominatorTreePrinterPass(dbgs())) FUNCTION_PASS("print", PostDominatorTreePrinterPass(dbgs())) +FUNCTION_PASS("print", InvariantInfoPrinterPass(dbgs())) FUNCTION_PASS("print", DominanceFrontierPrinterPass(dbgs())) FUNCTION_PASS("print", LoopPrinterPass(dbgs())) FUNCTION_PASS("print", RegionInfoPrinterPass(dbgs())) @@ -113,6 +115,7 @@ FUNCTION_PASS("verify", VerifierPass()) FUNCTION_PASS("verify", DominatorTreeVerifierPass()) FUNCTION_PASS("verify", RegionInfoVerifierPass()) +FUNCTION_PASS("verify", InvariantInfoVerifierPass()) #undef FUNCTION_PASS #ifndef LOOP_ANALYSIS Index: lib/Transforms/InstCombine/InstructionCombining.cpp =================================================================== --- lib/Transforms/InstCombine/InstructionCombining.cpp +++ lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1978,6 +1978,16 @@ C->isFalseWhenEqual())); } else if (isa(I) || isa(I)) { replaceInstUsesWith(*I, UndefValue::get(I->getType())); + } else if (IntrinsicInst *II = dyn_cast(I)) { + if (II->getIntrinsicID() == Intrinsic::objectsize) { + ConstantInt *CI = cast(II->getArgOperand(1)); + uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + replaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); + } + + // If this is an invariant start, mark it as unused before erasing it. + if (II->getIntrinsicID() == Intrinsic::invariant_start) + replaceInstUsesWith(*I, UndefValue::get(I->getType())); } eraseInstFromFunction(*I); } Index: lib/Transforms/Scalar/GVN.cpp =================================================================== --- lib/Transforms/Scalar/GVN.cpp +++ lib/Transforms/Scalar/GVN.cpp @@ -34,6 +34,7 @@ #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/PostDominators.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" @@ -594,7 +595,8 @@ auto &TLI = AM.getResult(F); auto &AA = AM.getResult(F); auto &MemDep = AM.getResult(F); - bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep); + auto *InvInfo = AM.getCachedResult(F); + bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, InvInfo); return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } @@ -2161,7 +2163,12 @@ /// runOnFunction - This is the main transformation entry point for a function. bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT, const TargetLibraryInfo &RunTLI, AAResults &RunAA, - MemoryDependenceResults *RunMD) { + MemoryDependenceResults *RunMD, InvariantInfo *InvInfo) { + + // Enable invariant range analysis here to keep its effect local to GVN. + if (InvInfo) + InvInfo->EnableInvariantRangeAnalysis(); + AC = &RunAC; DT = &RunDT; VN.setDomTree(DT); @@ -2214,6 +2221,10 @@ // iteration. DeadBlocks.clear(); + // Disable invariant range analysis when finished with this pass. + if (InvInfo) + InvInfo->EnableInvariantRangeAnalysis(false); + return Changed; } @@ -2677,21 +2688,33 @@ if (skipOptnoneFunction(F)) return false; + if (!NoLoads) { + // Once enabled, initialize the analysis' dependencies. + auto &InvRA = getAnalysis(); + InvRA.InitDependencies(*this); + } + return Impl.runImpl( F, getAnalysis().getAssumptionCache(F), getAnalysis().getDomTree(), getAnalysis().getTLI(), getAnalysis().getAAResults(), NoLoads ? nullptr - : &getAnalysis().getMemDep()); + : &getAnalysis().getMemDep(), + NoLoads ? nullptr + : &getAnalysis().getInvariantInfo()); } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); AU.addRequired(); - if (!NoLoads) + if (!NoLoads) { AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + } AU.addRequired(); AU.addPreserved(); @@ -2713,7 +2736,9 @@ INITIALIZE_PASS_BEGIN(GVNLegacyPass, "gvn", "Global Value Numbering", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) +INITIALIZE_PASS_DEPENDENCY(InvariantRangeAnalysisPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) Index: test/Analysis/InvariantInfo/gvn-basic.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/gvn-basic.ll @@ -0,0 +1,128 @@ +; RUN: opt < %s -S -gvn 2>&1 | FileCheck %s +; RUN: opt < %s -gvn -stats 2>&1 | FileCheck %s -check-prefix=STATS + +; Tests elementary load elimination using call instructions as potential +; clobbers. Also checks statistical data generation. +; NOTE: "-gvn" expands to +; "-targetlibinfo -tti -assumption-cache-tracker -domtree " +; "-postdomtree -invariant-range-analysis -basicaa -aa -memdep " +; "-gvn -verify" + +; STATS: 2 gvn - Number of blocks merged +; STATS: 2 gvn - Number of equalities propagated +; STATS: 6 gvn - Number of instructions deleted +; STATS: 6 gvn - Number of loads deleted +; STATS: 18 invariant_info - Number of invariant range analysis computed +; STATS: 21 invariant_info - Number of invariant range analysis requested +; STATS: 4 invariant_info - Number of invariant_end instructions +; STATS: 4 invariant_info - Number of invariant_start instructions +; STATS: 1 memdep - Number of block queries that were completely cached +; STATS: 1 memdep - Number of uncached non-local ptr responses + +; A simple single range. +define void @simple_range() { +; CHECK: define void @simple_range() +entry: + %i = alloca i32 + call void @foo(i32* %i) ; #1 + + %bi = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %i ; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + call void @foo(i32* %i) ; #2 + %ld2 = load i32, i32* %i ; Not clobbered by #2; Merged into %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld2) + call void @foo(i32* %i) ; #3 + + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld3 = load i32, i32* %i ; Not clobbered by #3, nor #2; Merged into %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld3) + call void @foo(i32* %i) ; #4 + %ld4 = load i32, i32* %i ; Clobbered by #4; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld4) + ret void +} + +; Multiple ranges, spanning different branches. +define void @multi_branch_ranges() { +; CHECK: define void @multi_branch_ranges() +entry: + %i = alloca i32 + call void @foo(i32* %i) ; #1 + + %bi = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #2 + br label %next + +next: + %ld1 = load i32, i32* %i ; Not clobbered by #2; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld2 = load i32, i32* %i ; Merged into %2. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld2) + + %inv2 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #3 + %ld3 = load i32, i32* %i ; Clobbered by #3; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld3) + %cond = icmp eq i32 %ld1, %ld3 + br i1 %cond, label %same, label %end + +same: + call void @foo(i32* %i) ; #4 + %ld4 = load i32, i32* %i ; Not clobbered by #4; Merged into %ld3 == %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld4) + + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %bi) ; Redundant -- since nested within another invariant range. + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #5 + %ld5 = load i32, i32* %i ; Not clobbered by #5; Merged into %ld3 == %ld1. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld5) + br label %end_same + +end_same: + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %bi) ; invariant_start %inv2 ends here. + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #6 + %ld6 = load i32, i32* %i ; Clobbered by #6; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld6) + + %inv3 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @foo(i32* %i) ; #7 + %ld7 = load i32, i32* %i ; Not clobbered by #7; Merged into %ld6. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld7) + br label %end + +end: + call void @foo(i32* %i) ; #8 + %ld8 = load i32, i32* %i ; Clobbered by #8; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld8) + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) + Index: test/Analysis/InvariantInfo/gvn.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/gvn.ll @@ -0,0 +1,158 @@ +; RUN: opt < %s -gvn -S | FileCheck %s + +; Tests elementary load elimination using instructions other than call +; instructions as potential clobbers. + +; select instructions: +; Loading from a select instruction is clobbered by any instruction that +; clobbers at least one of the select values. It is not clobbered by +; instructions that do not clobber any of the select values. +define void @select() { +; CHECK: define void @select() +entry: + %i = alloca i32 + %j = alloca i32 + %cond = call i1 @condition(); + %ij = select i1 %cond, i32* %i, i32* %j + call void @foo(i32* %i, i32* %j) ; #1 + + %bi = bitcast i32* %i to i8* + %invi = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %ij ; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + call void @foo(i32* %i, i32* %j) ; #2 + %ld2 = load i32, i32* %ij ; Clobbered by #2; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld2) + call void @foo(i32* %i, i32* %j) ; #3 + + %bj = bitcast i32* %j to i8* + %invj = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld3 = load i32, i32* %ij ; Clobbered by #3; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld3) + call void @foo(i32* %i, i32* %j) ; #4 + %ld4 = load i32, i32* %ij ; Not clobbered by #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld4) + call void @foo(i32* %i, i32* %j) ; #5 + + call void @llvm.invariant.end({}* %invj, i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld5 = load i32, i32* %ij ; Not clobbered by #5, nor #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld5) + call void @foo(i32* %i, i32* %j) ; #6 + %ld6 = load i32, i32* %ij ; Clobbered by #6; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld6) + call void @foo(i32* %i, i32* %j) ; #7 + + call void @llvm.invariant.end({}* %invi, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld7 = load i32, i32* %ij ; Clobbered by #7; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld7) + call void @foo(i32* %i, i32* %j) ; #8 + %ld8 = load i32, i32* %ij ; Clobbered by #8; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld8) + + ret void +} + +; PHI nodes: +; Loading from a phi node is clobbered by any instruction that clobbers +; at least one of the incoming values. It is not clobbered by instructions +; that do not clobber any of the incoming values. +define void @phi_node() { +; CHECK: define void @phi_node() +entry: + %i = alloca i32 + %j = alloca i32 + %cond = call i1 @condition(); + br i1 %cond, label %iside, label %jside + +iside: ; Some fillers... + call void @foo(i32* %i, i32* %j) + br label %both + +jside: ; Some fillers... + call void @foo(i32* %i, i32* %j) + %ldj = load i32, i32* %j + call void @bar(i32 %ldj) + call void @foo(i32* %i, i32* %j) + br label %both + +both: ; The rest is similar to select() above. + %ij = phi i32* [ %i, %iside ], [ %j, %jside ] + call void @foo(i32* %i, i32* %j) ; #1 + + %bi = bitcast i32* %i to i8* + %invi = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %ij ; Clobbered by #1; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld1) + call void @foo(i32* %i, i32* %j) ; #2 + %ld2 = load i32, i32* %ij ; Clobbered by #2; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld2) + call void @foo(i32* %i, i32* %j) ; #3 + + %bj = bitcast i32* %j to i8* + %invj = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld3 = load i32, i32* %ij ; Clobbered by #3; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld3) + call void @foo(i32* %i, i32* %j) ; #4 + %ld4 = load i32, i32* %ij ; Not clobbered by #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld4) + call void @foo(i32* %i, i32* %j) ; #5 + + call void @llvm.invariant.end({}* %invj, i64 4, i8* %bj) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld5 = load i32, i32* %ij ; Not clobbered by #5, nor #4; Merged into %ld3. + ; CHECK-NOT: load i32, i32* + call void @bar(i32 %ld5) + call void @foo(i32* %i, i32* %j) ; #6 + %ld6 = load i32, i32* %ij ; Clobbered by #6; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld6) + call void @foo(i32* %i, i32* %j) ; #7 + + call void @llvm.invariant.end({}* %invi, i64 4, i8* %bi) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + %ld7 = load i32, i32* %ij ; Clobbered by #7; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld7) + call void @foo(i32* %i, i32* %j) ; #8 + %ld8 = load i32, i32* %ij ; Clobbered by #8; Unchanged. + ; CHECK: load i32, i32* + call void @bar(i32 %ld8) + + ret void +} + +; TODO: Test other instructions that can modify memory. +; Examples: +; define void @invoke() { ... } +; define void @store() { ... } +; define void @vaarg() { ... } +; define void @catchpad() { ... } +; define void @catchreturn() { ... } +; ... +; define void @globalvar() { ... } +; ... + +declare i1 @condition() +declare void @bar(i32) readonly +declare void @foo(i32*, i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) + Index: test/Analysis/InvariantInfo/instcombine.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/instcombine.ll @@ -0,0 +1,42 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +; Do not remove intrinsic calls after merging uses of defined values. +define void @merged_def() { +; CHECK: define void @merged_def +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv0 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + ; CHECK: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @bar(i32 %ld1) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + ; CHECK: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + ret void +} + +; Remove intrinsic calls after merging uses of undefined values. +define void @merged_undef() { +; CHECK: define void @merged_undef +entry: + %i = alloca i32 + %0 = bitcast i32* %i to i8* + %inv0 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + ; CHECK-NOT: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + ; CHECK-NOT: call {{.*}}@llvm.invariant.start(i64 {{[0-9]+}}, i8* + call void @bar(i32 undef) + call void @bar(i32 undef) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + ; CHECK-NOT: call {{.*}}@llvm.invariant.end({{.*}}, i64 {{[0-9]+}}, i8* + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) Index: test/Analysis/InvariantInfo/print.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/print.ll @@ -0,0 +1,100 @@ +; RUN: opt < %s -invariant-range-analysis -analyze 2>&1 | FileCheck %s +; RUN: opt < %s -invariant-range-analysis -disable-output -passes="print" 2>&1 | FileCheck %s +; RUN: opt < %s -invariant-range-analysis -disable-output -passes="print,verify" 2>&1 | FileCheck %s +; RUN: opt < %s -invariant-range-analysis -disable-output -passes="print,verify" -stats 2>&1 | FileCheck %s -check-prefix=STATS + +; Tests printing, verification, and statistics functionality for invariant +; range analysis. + +; CHECK: Invariant Info for function: simple_range +; CHECK-NEXT: %i = alloca i32 : +; CHECK-NEXT: invariant_start in block: entry +; CHECK-NEXT: invariant_end in block: entry + +; CHECK: Invariant Info for function: multi_branch_ranges +; CHECK-NEXT: %i = alloca i32 : +; CHECK-NEXT: invariant_start in block: entry +; CHECK-NEXT: invariant_end in block: next +; CHECK-NEXT: invariant_start in block: next +; CHECK-NEXT: invariant_end in block: end_same +; CHECK-NEXT: invariant_end in block: same +; CHECK-NEXT: invariant_start in block: end_same +; CHECK-NOT: invariant_end + +; STATS: 4 invariant_info - Number of invariant_end instructions +; STATS: 4 invariant_info - Number of invariant_start instructions + +; A simple single range. +define void @simple_range() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @foo(i32* %i) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + call void @foo(i32* %i) + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + ret void +} + +; Multiple ranges, spanning different branches. +define void @multi_branch_ranges() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + br label %next + +next: + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + %inv2 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + %cond = icmp eq i32 %ld2, %ld3 + br i1 %cond, label %same, label %end + +same: + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld5 = load i32, i32* %i + call void @bar(i32 %ld5) + br label %end_same + +end_same: + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld6 = load i32, i32* %i + call void @bar(i32 %ld6) + %inv3 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld7 = load i32, i32* %i + call void @bar(i32 %ld7) + br label %end + +end: + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) + Index: test/Analysis/InvariantInfo/verify.ll =================================================================== --- /dev/null +++ test/Analysis/InvariantInfo/verify.ll @@ -0,0 +1,100 @@ +; Check the validity of our uses of invariant_start/end pairs, +; based on the legacy pass manager. +; RUN: opt < %s -invariant-range-analysis -disable-output -verify + +; Also check the validity under optimization levels enabled +; (which implicitly enable -invariant-range-analysis). +; RUN: opt < %s -O1 -disable-output -verify +; RUN: opt < %s -O2 -disable-output -verify +; RUN: opt < %s -O3 -disable-output -verify + +; A simple single range. +define void @simple_range() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @foo(i32* %i) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + call void @foo(i32* %i) + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + ret void +} + +; Multiple ranges, spanning different branches. +define void @multi_branch_ranges() { +entry: + %i = alloca i32 + call void @foo(i32* %i) + %0 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + br label %next + +next: + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %0) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + %inv2 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld3 = load i32, i32* %i + call void @bar(i32 %ld3) + %cond = icmp eq i32 %ld2, %ld3 + br i1 %cond, label %same, label %end + +same: + %ld4 = load i32, i32* %i + call void @bar(i32 %ld4) + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld5 = load i32, i32* %i + call void @bar(i32 %ld5) + br label %end_same + +end_same: + call void @llvm.invariant.end({}* %inv2, i64 4, i8* %0) + call void @foo(i32* %i) + %ld6 = load i32, i32* %i + call void @bar(i32 %ld6) + %inv3 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + call void @foo(i32* %i) + %ld7 = load i32, i32* %i + call void @bar(i32 %ld7) + br label %end + +end: + ret void +} + +; Remove intrinsic calls after substituting undefined values. +define void @undef() { +entry: + %i = alloca i32 + %0 = bitcast i32* %i to i8* + %inv0 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %0) + %1 = bitcast i32* %i to i8* + %inv1 = call {}* (i64, i8*) @llvm.invariant.start(i64 4, i8* %1) + %ld1 = load i32, i32* %i + call void @bar(i32 %ld1) + %ld2 = load i32, i32* %i + call void @bar(i32 %ld2) + call void @llvm.invariant.end({}* %inv1, i64 4, i8* %1) + ret void +} + +declare void @bar(i32) readonly +declare void @foo(i32*) +declare {}* @llvm.invariant.start(i64, i8* nocapture) +declare void @llvm.invariant.end({}*, i64, i8* nocapture) +