Index: include/llvm/Analysis/AliasAnalysis.h =================================================================== --- include/llvm/Analysis/AliasAnalysis.h +++ include/llvm/Analysis/AliasAnalysis.h @@ -145,6 +145,19 @@ Location getLocation(const AtomicRMWInst *RMWI); static Location getLocationForSource(const MemTransferInst *MTI); static Location getLocationForDest(const MemIntrinsic *MI); + Location getLocation(const Instruction *Inst) { + if (auto *I = dyn_cast(Inst)) + return getLocation(I); + else if (auto *I = dyn_cast(Inst)) + return getLocation(I); + else if (auto *I = dyn_cast(Inst)) + return getLocation(I); + else if (auto *I = dyn_cast(Inst)) + return getLocation(I); + else if (auto *I = dyn_cast(Inst)) + return getLocation(I); + llvm_unreachable("unsupported memory instruction"); + } /// Alias analysis result - Either we know for sure that it does not alias, we /// know for sure it must alias, or we don't know anything: The two pointers @@ -352,6 +365,24 @@ return (MRB & ModRef) && (MRB & ArgumentPointees); } + /// getModRefInfo - Return information about whether or not an + /// instruction may read or write memory (without regard to a + /// specific location) + ModRefResult getModRefInfo(const Instruction *I) { + if (isa(I) || isa(I)) { + auto MRB = getModRefBehavior(I); + if (MRB & ModRef) + return ModRef; + else if (MRB & Ref) + return Ref; + else if (MRB & Mod) + return Mod; + return NoModRef; + } + + return getModRefInfo(I, Location()); + } + /// getModRefInfo - Return information about whether or not an instruction may /// read or write the specified memory location. An instruction /// that doesn't read or write memory may be trivially LICM'd for example. @@ -472,6 +503,10 @@ ModRefResult getModRefInfo(const VAArgInst* I, const Value* P, uint64_t Size){ return getModRefInfo(I, Location(P, Size)); } + /// instructionClobbersCall - Return whether a given instruction could modify + /// memory used by a given call + virtual bool instructionClobbersCall(Instruction *I, + ImmutableCallSite Call); /// getModRefInfo - Return information about whether two call sites may refer /// to the same set of memory locations. See Index: include/llvm/IR/Function.h =================================================================== --- include/llvm/IR/Function.h +++ include/llvm/IR/Function.h @@ -472,6 +472,10 @@ Constant *getPrologueData() const; void setPrologueData(Constant *PrologueData); + /// Print the function to an output stream with an optional + /// AssemblyAnnotationWriter. + void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr) const; + /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the /// program, displaying the CFG of the current function with the code for each Index: include/llvm/InitializePasses.h =================================================================== --- include/llvm/InitializePasses.h +++ include/llvm/InitializePasses.h @@ -201,6 +201,8 @@ void initializeMemDepPrinterPass(PassRegistry&); void initializeMemDerefPrinterPass(PassRegistry&); void initializeMemoryDependenceAnalysisPass(PassRegistry&); +void initializeMemorySSALazyPass(PassRegistry&); +void initializeMemorySSAPrinterPassPass(PassRegistry&); void initializeMergedLoadStoreMotionPass(PassRegistry &); void initializeMetaRenamerPass(PassRegistry&); void initializeMergeFunctionsPass(PassRegistry&); Index: include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- /dev/null +++ include/llvm/Transforms/Utils/MemorySSA.h @@ -0,0 +1,594 @@ +//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief This file exposes an interface to building/using memory SSA to +// walk memory instructions using a use/def graph + +// Memory SSA class builds an SSA form that links together memory +// access instructions such loads, stores, and clobbers (atomics, +// calls, etc), so they can be walked easily. Additionally, it does a +// trivial form of "heap versioning" Every time the memory state +// changes in the program, we generate a new heap version It generates +// MemoryDef/Uses/Phis that are overlayed on top of the existing +// instructions + +// As a trivial example, +// define i32 @main() #0 { +// entry: +// %call = call noalias i8* @_Znwm(i64 4) #2 +// %0 = bitcast i8* %call to i32* +// %call1 = call noalias i8* @_Znwm(i64 4) #2 +// %1 = bitcast i8* %call1 to i32* +// store i32 5, i32* %0, align 4 +// store i32 7, i32* %1, align 4 +// %2 = load i32* %0, align 4 +// %3 = load i32* %1, align 4 +// %add = add nsw i32 %2, %3 +// ret i32 %add +// } +// Will become +// define i32 @main() #0 { +// entry: +// ; 1 = MemoryDef(0) +// %call = call noalias i8* @_Znwm(i64 4) #3 +// %2 = bitcast i8* %call to i32* +// ; 2 = MemoryDef(1) +// %call1 = call noalias i8* @_Znwm(i64 4) #3 +// %4 = bitcast i8* %call1 to i32* +// ; 3 = MemoryDef(2) +// store i32 5, i32* %2, align 4 +// ; 4 = MemoryDef(3) +// store i32 7, i32* %4, align 4 +// ; MemoryUse(4) +// %7 = load i32* %2, align 4 +// ; MemoryUse(4) +// %8 = load i32* %4, align 4 +// %add = add nsw i32 %7, %8 +// ret i32 %add +// } +// Given this form, all the stores that could ever effect the load +// at %8 can be gotten by using the memory use associated with it, +// and walking from use to def until you hit the top of the function. + +// Each def also has a list of uses +// Also note that it does not attempt any disambiguation, it is simply +// linking together the instructions. + +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_MEMORYSSA_H +#define LLVM_TRANSFORMS_UTILS_MEMORYSSA_H +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Dominators.h" +#include "llvm/Pass.h" +#include +namespace llvm { + +class BasicBlock; +class DominatorTree; +class Function; + +// \brief The base for all memory accesses. All memory accesses in a +// block are linked together using an intrusive list. +class MemoryAccess : public ilist_node { +public: + enum AccessType { AccessUse, AccessDef, AccessPhi }; + + // Methods for support type inquiry through isa, cast, and + // dyn_cast + static inline bool classof(const MemoryAccess *) { return true; } + + virtual ~MemoryAccess(); + BasicBlock *getBlock() const { return Block; } + + /// \brief Get the instruction that this MemoryAccess represents. + /// This may be null in the case of phi nodes. + virtual Instruction *getMemoryInst() const = 0; + + /// \brief Set the instruction that this MemoryUse represents. + virtual void setMemoryInst(Instruction *MI) = 0; + + /// \brief Get the access that produces the memory state used by this access. + virtual MemoryAccess *getDefiningAccess() const = 0; + + /// \brief Replace our defining access with a new one. + /// + /// This function updates use lists. + virtual void setDefiningAccess(MemoryAccess *) = 0; + + virtual void print(raw_ostream &OS) const {}; + virtual void dump() const; + + typedef SmallPtrSet UseListType; + typedef UseListType::iterator iterator; + typedef UseListType::const_iterator const_iterator; + + unsigned use_size() const { return Uses.size(); } + bool use_empty() const { return Uses.empty(); } + iterator use_begin() { return Uses.begin(); } + iterator use_end() { return Uses.end(); } + iterator_range uses() { + return iterator_range(use_begin(), use_end()); + } + + const_iterator use_begin() const { return Uses.begin(); } + const_iterator use_end() const { return Uses.end(); } + iterator_range uses() const { + return iterator_range(use_begin(), use_end()); + } + +protected: + friend class MemorySSA; + friend class MemoryUse; + friend class MemoryDef; + friend class MemoryPhi; + AccessType getAccessType() const { return AccessType; } + + /// \brief Add a use of this memory access to our list of uses. + /// + /// Note: We depend on being able to add the same use multiple times and not + /// have it end up in our use list multiple times. + void addUse(MemoryAccess *Use) { Uses.insert(Use); } + + /// \brief Remove a use of this memory access from our list of uses. + void removeUse(MemoryAccess *Use) { Uses.erase(Use); } + + /// \brief Return true if \p Use is one of the uses of this memory access. + bool findUse(MemoryAccess *Use) { return Uses.count(Use); } + + MemoryAccess(AccessType AT, BasicBlock *BB) : AccessType(AT), Block(BB) {} + + /// \brief Used internally to give IDs to MemoryAccesses for printing + virtual unsigned int getID() const { return 0; } + +private: + MemoryAccess(const MemoryAccess &); + void operator=(const MemoryAccess &); + AccessType AccessType; + BasicBlock *Block; + UseListType Uses; +}; + +template <> +struct ilist_traits : public ilist_default_traits { + // See details of the instruction class for why this trick works + MemoryAccess *createSentinel() const { + return static_cast(&Sentinel); + } + + static void destroySentinel(MemoryAccess *) {} + + MemoryAccess *provideInitialHead() const { return createSentinel(); } + MemoryAccess *ensureHead(MemoryAccess *) const { return createSentinel(); } + static void noteHead(MemoryAccess *, MemoryAccess *) {} + +private: + mutable ilist_half_node Sentinel; +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, MemoryAccess &MA) { + MA.print(OS); + return OS; +} + +/// \brief Represents read-only accesses to memory +class MemoryUse : public MemoryAccess { +public: + MemoryUse(MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) + : MemoryUse(DMA, AccessUse, MI, BB) {} + + virtual MemoryAccess *getDefiningAccess() const final { + return DefiningAccess; + } + + virtual void setDefiningAccess(MemoryAccess *DMA) final { + if (DefiningAccess != DMA) { + if (DefiningAccess) + DefiningAccess->removeUse(this); + if (DMA) + DMA->addUse(this); + } + DefiningAccess = DMA; + } + virtual Instruction *getMemoryInst() const final { return MemoryInst; } + + virtual void setMemoryInst(Instruction *MI) final { MemoryInst = MI; } + + static inline bool classof(const MemoryUse *) { return true; } + static inline bool classof(const MemoryAccess *MA) { + return MA->getAccessType() == AccessUse; + } + virtual void print(raw_ostream &OS) const; + +protected: + MemoryUse(MemoryAccess *DMA, enum AccessType AT, Instruction *MI, + BasicBlock *BB) + : MemoryAccess(AT, BB), DefiningAccess(nullptr), MemoryInst(MI) { + setDefiningAccess(DMA); + } + +private: + MemoryAccess *DefiningAccess; + Instruction *MemoryInst; +}; + +/// \brief Represents a read-write access to memory, whether it is real, or a +/// clobber. +/// +/// Note that, in order to provide def-def chains, all defs also have a use +/// associated with them. +class MemoryDef final : public MemoryUse { +public: + MemoryDef(MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, unsigned Ver) + : MemoryUse(DMA, AccessDef, MI, BB), ID(Ver) {} + + static inline bool classof(const MemoryDef *) { return true; } + static inline bool classof(const MemoryUse *MA) { + return MA->getAccessType() == AccessDef; + } + + static inline bool classof(const MemoryAccess *MA) { + return MA->getAccessType() == AccessDef; + } + virtual void print(raw_ostream &OS) const; + +protected: + friend class MemorySSA; + // For debugging only. This gets used to give memory accesses pretty numbers + // when printing them out + + virtual unsigned int getID() const final { return ID; } + +private: + const unsigned int ID; +}; + +/// \brief Represents phi nodes for memory accesses. +/// +/// These have the same semantic as regular phi nodes, with the exception that +/// only one phi will ever exist in a given basic block. + +class MemoryPhi final : public MemoryAccess { +public: + MemoryPhi(BasicBlock *BB, unsigned int NP, unsigned int Ver) + : MemoryAccess(AccessPhi, BB), ID(Ver), NumPreds(NP) { + Args.reserve(NumPreds); + } + + virtual Instruction *getMemoryInst() const final { return nullptr; } + + virtual void setMemoryInst(Instruction *MI) final {} + + virtual MemoryAccess *getDefiningAccess() const { + llvm_unreachable("MemoryPhi's do not have a single defining access"); + } + virtual void setDefiningAccess(MemoryAccess *) final { + llvm_unreachable("MemoryPhi's do not have a single defining access"); + } + + /// \brief This is the number of actual predecessors this phi node has. + unsigned int getNumPreds() const { return NumPreds; } + + /// \brief This is the number of incoming values currently in use + /// + /// During SSA construction, we differentiate between this and NumPreds to + /// know when the PHI node is fully constructed. + unsigned int getNumIncomingValues() const { return Args.size(); } + + /// \brief Set the memory access of argument \p v of this phi node to be \p MA + /// + /// This function updates use lists. + void setIncomingValue(unsigned int v, MemoryAccess *MA) { + std::pair &Val = Args[v]; + // We need to update use lists. Because our uses are not to specific + // operands, but instead to this MemoryAccess, and because a given memory + // access may appear multiple times in the phi argument list, we need to be + // careful not to remove the use of this phi, from MA, until we check to + // make sure MA does not appear elsewhere in the phi argument list. + if (Val.second != MA) { + if (Val.second) { + bool existsElsewhere = false; + for (unsigned i = 0, e = Args.size(); i != e; ++i) { + if (i == v) + continue; + if (Args[i].second == Val.second) + existsElsewhere = true; + } + if (!existsElsewhere) + Val.second->removeUse(this); + } + MA->addUse(this); + Val.second = MA; + } + } + + MemoryAccess *getIncomingValue(unsigned int v) const { + return Args[v].second; + } + void setIncomingBlock(unsigned int v, BasicBlock *BB) { + std::pair &Val = Args[v]; + Val.first = BB; + } + BasicBlock *getIncomingBlock(unsigned int v) const { return Args[v].first; } + + typedef SmallVector, 8> ArgsType; + typedef ArgsType::const_iterator const_arg_iterator; + inline const_arg_iterator args_begin() const { return Args.begin(); } + inline const_arg_iterator args_end() const { return Args.end(); } + inline iterator_range args() const { + return iterator_range(args_begin(), args_end()); + } + + static inline bool classof(const MemoryPhi *) { return true; } + static inline bool classof(const MemoryAccess *MA) { + return MA->getAccessType() == AccessPhi; + } + + virtual void print(raw_ostream &OS) const; + +protected: + friend class MemorySSA; + + // MemorySSA currently cannot handle edge additions or deletions (but can + // handle direct replacement). This is protected to ensure people don't try. + void addIncoming(MemoryAccess *MA, BasicBlock *BB) { + Args.push_back(std::make_pair(BB, MA)); + MA->addUse(this); + } + + // For debugging only. This gets used to give memory accesses pretty numbers + // when printing them out + virtual unsigned int getID() const final { return ID; } + +private: + // For debugging only + const unsigned ID; + unsigned NumPreds; + ArgsType Args; +}; + +class MemorySSAWalker; + +/// \brief Encapsulates MemorySSA, including all data associated with memory +/// accesses. +class MemorySSA { +public: + MemorySSA(Function &); + ~MemorySSA(); + + /// \brief Build Memory SSA, and return the walker we used during building, + /// for later reuse. If MemorySSA is already built, just return the walker. + MemorySSAWalker *buildMemorySSA(AliasAnalysis *, DominatorTree *); + + /// \brief Given a memory using/clobbering/etc instruction, get the MemorySSA + /// access associaed with it. If passed a basic block gets the memory phi + /// node that exists for that block, if there is one. + MemoryAccess *getMemoryAccess(const Value *) const; + void dump(Function &); + void print(raw_ostream &) const; + + /// \brief Return true if \p MA represents the live on entry value + /// + /// Loads and stores from pointer arguments and other global values may be + /// defined by memory operations that do not occur in the current function, so + /// they may be live on entry to the function. MemorySSA represents such + /// memory state by the live on entry definition, which is guaranteed to + /// occurr before any other memory access in the function. + inline bool isLiveOnEntryDef(const MemoryAccess *MA) const { + return MA == LiveOnEntryDef; + } + + inline const MemoryAccess *getLiveOnEntryDef() const { + assert(LiveOnEntryDef && "Live on entry def not initialized yet"); + return LiveOnEntryDef; + } + + typedef ilist AccessListType; + + /// \brief Return the list of MemoryAccess's for a given basic block. + /// + /// This list is not modifiable by the user. + const AccessListType *getBlockAccesses(const BasicBlock *BB) { + return PerBlockAccesses[BB]; + } + + /// \brief Remove a MemoryAccess from MemorySSA, including updating all + // definitions and uses. + void removeMemoryAccess(MemoryAccess *); + + /// \brief Replace one MemoryAccess with another, including updating all + /// definitions and uses. + void replaceMemoryAccess(MemoryAccess *Replacee, MemoryAccess *Replacer); + + enum InsertionPlace { Beginning, End }; + + /// \brief Replace a MemoryAccess with a new access, created based on + /// instruction \p Replacer - this does not perform generic SSA updates, so it + /// only works if the new access dominates the old accesses uses. + /// + /// This version places the access at either the end or the beginning of \p + /// Replacer's block, depending on the value of \p Where. + /// + /// \returns the new access that was created. + MemoryAccess *replaceMemoryAccessWithNewAccess(MemoryAccess *Replacee, + Instruction *Replacer, + enum InsertionPlace Where); + /// \brief Replace a MemoryAccess with a new access, created based on + /// instruction \p Replacer - this does not perform generic SSA updates, so it + /// only works if the new access dominates the old accesses uses. + /// + /// This version places the access before the place that \p Where points to. + MemoryAccess * + replaceMemoryAccessWithNewAccess(MemoryAccess *Replacee, + Instruction *Replacer, + const AccessListType::iterator &Where); + +protected: + // Used by Memory SSA annotater, dumpers, and wrapper pass + friend class MemorySSAAnnotatedWriter; + friend class MemorySSAPrinterPass; + void verifyDefUses(Function &F); + void verifyDomination(Function &F); + +private: + void verifyUseInDefs(MemoryAccess *, MemoryAccess *); + typedef DenseMap AccessMap; + + void + determineInsertionPoint(AccessMap &BlockAccesses, + const SmallPtrSetImpl &DefiningBlocks); + void computeDomLevels(DenseMap &DomLevels); + void markUnreachableAsLiveOnEntry(AccessMap &BlockAccesses, BasicBlock *BB); + bool dominatesUse(MemoryAccess *, MemoryAccess *) const; + void removeFromLookups(MemoryAccess *); + struct RenamePassData { + BasicBlock *BB; + BasicBlock *Pred; + MemoryAccess *MA; + + RenamePassData() : BB(nullptr), Pred(nullptr), MA(nullptr) {} + + RenamePassData(BasicBlock *B, BasicBlock *P, MemoryAccess *M) + : BB(B), Pred(P), MA(M) {} + void swap(RenamePassData &RHS) { + std::swap(BB, RHS.BB); + std::swap(Pred, RHS.Pred); + std::swap(MA, RHS.MA); + } + }; + + void renamePass(BasicBlock *BB, BasicBlock *Pred, MemoryAccess *IncomingVal, + AccessMap &BlockAccesses, + std::vector &Worklist, + SmallPtrSet &Visited, MemorySSAWalker *); + AliasAnalysis *AA; + DominatorTree *DT; + Function &F; + + // Memory SSA mappings + DenseMap InstructionToMemoryAccess; + AccessMap PerBlockAccesses; + MemoryAccess *LiveOnEntryDef; + + // Memory SSA building info + unsigned int nextID; + bool builtAlready; + MemorySSAWalker *Walker; +}; + +// This pass does eager building and then printing of MemorySSA. It is used by +// the tests to be able to build, dump, and verify Memory SSA. + +class MemorySSAPrinterPass : public FunctionPass { +public: + MemorySSAPrinterPass(); + + static char ID; + bool doInitialization(Module &M) override; + bool runOnFunction(Function &) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *M) const override; + static void registerOptions(); + MemorySSA &getMSSA() { return *MSSA; } + +private: + bool VerifyMemorySSA; + + MemorySSA *MSSA; + MemorySSAWalker *Walker; + Function *F; +}; + +class MemorySSALazy : public FunctionPass { +public: + MemorySSALazy(); + + static char ID; + bool runOnFunction(Function &) override; + void releaseMemory() override; + MemorySSA &getMSSA() { return *MSSA; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + MemorySSA *MSSA; +}; + +/// \brief This is the generic walker interface for walkers of MemorySSA. +/// Walkers are used to be able to further disambiguate the def-use chains +/// MemorySSA gives you. +class MemorySSAWalker { +public: + MemorySSAWalker(MemorySSA *); + virtual ~MemorySSAWalker() {} + + typedef SmallVector MemoryAccessSet; + /// \brief Given a memory defining/using/clobbering instruction, calling this + /// will give you the nearest dominating clobbering MemoryAccess (by skipping + /// non-aliasing def links). + /// + /// Note that this will return a single access, and it must dominate the + /// Instruction, so if an argument of a MemoryPhi node clobbers thenstruction, + /// it will return the memoryphi node, *not* the argument. + virtual MemoryAccess *getClobberingMemoryAccess(const Instruction *) = 0; + + // Given a memory defining/using/clobbering instruction, calling this will + // give you the set of nearest clobbering accesses. They are not guaranteed + // to dominate an instruction. The main difference between this and the above + // is that if a phi's argument clobbers the instruction, the set will include + // the nearest clobbering access of all of phi arguments, instead of the phi. + // virtual MemoryAccessSet getClobberingMemoryAccesses(const Instruction *) = + // 0; + +protected: + MemorySSA *MSSA; +}; + +/// \brief A MemorySSAWalker that does no alias queries, or anything else. It +/// simply returns the links as they were constructed by the builder. +class DoNothingMemorySSAWalker final : public MemorySSAWalker { +public: + MemoryAccess *getClobberingMemoryAccess(const Instruction *) override; +}; + +/// \brief A MemorySSAWalker that does real AA walks and caching of lookups. +class CachingMemorySSAWalker final : public MemorySSAWalker { +public: + CachingMemorySSAWalker(MemorySSA *, AliasAnalysis *); + virtual ~CachingMemorySSAWalker(); + MemoryAccess *getClobberingMemoryAccess(const Instruction *) override; + +protected: + struct UpwardsMemoryQuery; + MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &); + void doCacheInsert(const MemoryAccess *, MemoryAccess *, + const UpwardsMemoryQuery &); + +private: + std::pair + getClobberingMemoryAccess(MemoryPhi *Phi, struct UpwardsMemoryQuery &); + std::pair + getClobberingMemoryAccess(MemoryAccess *, struct UpwardsMemoryQuery &); + + DenseMap, + MemoryAccess *> CachedUpwardsClobberingAccess; + DenseMap CachedUpwardsClobberingCall; + AliasAnalysis *AA; +}; +} + +#endif Index: lib/Analysis/AliasAnalysis.cpp =================================================================== --- lib/Analysis/AliasAnalysis.cpp +++ lib/Analysis/AliasAnalysis.cpp @@ -82,6 +82,24 @@ AA->addEscapingUse(U); } +bool AliasAnalysis::instructionClobbersCall(Instruction *I, + ImmutableCallSite Call) { + // We may have two calls + if (isa(I) || isa(I)) { + // Check if the two calls modify the same memory + if (getModRefInfo(Call, I) & AliasAnalysis::Mod) + return true; + } else { + // Otherwise, check if the call modifies or references the + // location this memory access defines. The best we can say + // is that if the call references what this instruction + // defines, it must be clobbered by this location. + const AliasAnalysis::Location DefLoc = AA->getLocation(I); + if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) + return true; + } + return false; +} AliasAnalysis::ModRefResult AliasAnalysis::getModRefInfo(ImmutableCallSite CS, @@ -330,7 +348,7 @@ // If the load address doesn't alias the given address, it doesn't read // or write the specified memory. - if (!alias(getLocation(L), Loc)) + if (Loc.Ptr && !alias(getLocation(L), Loc)) return NoModRef; // Otherwise, a load just reads. @@ -343,16 +361,19 @@ if (!S->isUnordered()) return ModRef; - // If the store address cannot alias the pointer in question, then the - // specified memory cannot be modified by the store. - if (!alias(getLocation(S), Loc)) - return NoModRef; - - // If the pointer is a pointer to constant memory, then it could not have been - // modified by this store. - if (pointsToConstantMemory(Loc)) - return NoModRef; + if (Loc.Ptr) { + // If the store address cannot alias the pointer in question, then the + // specified memory cannot be modified by the store. + if (!alias(getLocation(S), Loc)) + return NoModRef; + + // If the pointer is a pointer to constant memory, then it could not have + // been modified by this store. + if (pointsToConstantMemory(Loc)) + return NoModRef; + } + // Otherwise, a store just writes. return Mod; } Index: lib/Analysis/Analysis.cpp =================================================================== --- lib/Analysis/Analysis.cpp +++ lib/Analysis/Analysis.cpp @@ -56,6 +56,8 @@ initializeMemDepPrinterPass(Registry); initializeMemDerefPrinterPass(Registry); initializeMemoryDependenceAnalysisPass(Registry); + initializeMemorySSALazyPass(Registry); + initializeMemorySSAPrinterPassPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); initializeRegionInfoPassPass(Registry); Index: lib/IR/AsmWriter.cpp =================================================================== --- lib/IR/AsmWriter.cpp +++ lib/IR/AsmWriter.cpp @@ -3061,6 +3061,13 @@ // External Interface declarations //===----------------------------------------------------------------------===// +void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { + SlotTracker SlotTable(this->getParent()); + formatted_raw_ostream OS(ROS); + AssemblyWriter W(OS, SlotTable, this->getParent(), AAW); + W.printFunction(this); +} + void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const { SlotTracker SlotTable(this); formatted_raw_ostream OS(ROS); Index: lib/Transforms/Utils/CMakeLists.txt =================================================================== --- lib/Transforms/Utils/CMakeLists.txt +++ lib/Transforms/Utils/CMakeLists.txt @@ -24,6 +24,7 @@ LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + MemorySSA.cpp MetaRenamer.cpp ModuleUtils.cpp PromoteMemoryToRegister.cpp Index: lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- /dev/null +++ lib/Transforms/Utils/MemorySSA.cpp @@ -0,0 +1,1045 @@ +//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the MemorySSA class. +// +//===----------------------------------------------------------------===// +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include +#include + +#define DEBUG_TYPE "memoryssa" +using namespace llvm; +STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups"); +STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits"); + +INITIALIZE_PASS_WITH_OPTIONS_BEGIN(MemorySSAPrinterPass, "print-memoryssa", + "Memory SSA", true, true) +INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(MemorySSAPrinterPass, "print-memoryssa", "Memory SSA", true, + true) + +INITIALIZE_PASS(MemorySSALazy, "memoryssalazy", "Memory SSA", true, true) + +namespace llvm { + +/// \brief An annotator class to print Memory SSA information in comments. +class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { + friend class MemorySSA; + const MemorySSA *MSSA; + +public: + MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) { + MemoryAccess *MA = MSSA->getMemoryAccess(BB); + if (MA) + OS << "; " << *MA << "\n"; + } + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + MemoryAccess *MA = MSSA->getMemoryAccess(I); + if (MA) + OS << "; " << *MA << "\n"; + } +}; +} + +/// \brief This is the same algorithm as PromoteMemoryToRegister's phi +/// placement algorithm. It is a linear time phi placement algorithm. +void MemorySSA::determineInsertionPoint( + AccessMap &BlockAccesses, const SmallPtrSetImpl &DefBlocks) { + // Compute dominator levels and BB numbers + DenseMap DomLevels; + computeDomLevels(DomLevels); + + // Use a priority queue keyed on dominator tree level so that inserted nodes + // are handled from the bottom of the dominator tree upwards. + typedef std::pair DomTreeNodePair; + typedef std::priority_queue, + less_second> IDFPriorityQueue; + IDFPriorityQueue PQ; + + for (BasicBlock *BB : DefBlocks) { + if (DomTreeNode *Node = DT->getNode(BB)) + PQ.push(std::make_pair(Node, DomLevels[Node])); + } + + SmallVector DFBlocks; + SmallPtrSet Visited; + SmallVector Worklist; + while (!PQ.empty()) { + DomTreeNodePair RootPair = PQ.top(); + PQ.pop(); + DomTreeNode *Root = RootPair.first; + unsigned RootLevel = RootPair.second; + + // Walk all dominator tree children of Root, inspecting their CFG edges with + // targets elsewhere on the dominator tree. Only targets whose level is at + // most Root's level are added to the iterated dominance frontier of the + // definition set. + + Worklist.clear(); + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + BasicBlock *BB = Node->getBlock(); + + for (auto S : successors(BB)) { + DomTreeNode *SuccNode = DT->getNode(S); + + // Quickly skip all CFG edges that are also dominator tree edges instead + // of catching them below. + if (SuccNode->getIDom() == Node) + continue; + + unsigned SuccLevel = DomLevels[SuccNode]; + if (SuccLevel > RootLevel) + continue; + + if (!Visited.insert(SuccNode).second) + continue; + + BasicBlock *SuccBB = SuccNode->getBlock(); + + DFBlocks.push_back(SuccBB); + if (!DefBlocks.count(SuccBB)) + PQ.push(std::make_pair(SuccNode, SuccLevel)); + } + + for (auto &C : *Node) + if (!Visited.count(C)) + Worklist.push_back(C); + } + } + + for (auto &BB : DFBlocks) { + // Insert phi node + auto Accesses = BlockAccesses.lookup(BB); + if (!Accesses) { + Accesses = new ilist; + BlockAccesses.insert(std::make_pair(BB, Accesses)); + } + MemoryPhi *Phi = new MemoryPhi( + BB, std::distance(pred_begin(BB), pred_end(BB)), nextID++); + InstructionToMemoryAccess.insert(std::make_pair(BB, Phi)); + // Phi goes first + Accesses->push_front(Phi); + } +} + +/// \brief This is the standard SSA renaming algorithm. +void MemorySSA::renamePass(BasicBlock *BB, BasicBlock *Pred, + MemoryAccess *IncomingVal, AccessMap &BlockAccesses, + std::vector &Worklist, + SmallPtrSet &Visited, + MemorySSAWalker *Walker) { +NextIteration: + const auto Accesses = BlockAccesses.lookup(BB); + + // First rename the phi nodes + if (Accesses && isa(Accesses->front())) { + MemoryPhi *Phi = cast(&Accesses->front()); + unsigned NumEdges = std::count(succ_begin(Pred), succ_end(Pred), BB); + assert(NumEdges && "Must be at least one edge from Pred to BB!"); + for (unsigned i = 0; i != NumEdges; ++i) + Phi->addIncoming(IncomingVal, Pred); + + IncomingVal = Phi; + } + + // Don't revisit blocks. + if (!Visited.insert(BB).second) + return; + + // Skip if the list is empty, but we still have to pass thru the + // incoming value info/etc to successors + if (Accesses) + for (auto &L : *Accesses) { + if (isa(L)) + continue; + + if (MemoryUse *MU = dyn_cast(&L)) { + MU->setDefiningAccess(IncomingVal); + auto RealVal = Walker->getClobberingMemoryAccess(MU->getMemoryInst()); + MU->setDefiningAccess(RealVal); + } else if (MemoryDef *MD = dyn_cast(&L)) { + // We can't legally optimize defs, because we only allow single memory + // phis/uses on operations, and if we optimize these, we can end up with + // multiple reaching defs. Uses do not have this problem, since they do + // not produce a value + MD->setDefiningAccess(IncomingVal); + IncomingVal = MD; + } + } + // 'Recurse' to our successors. + succ_iterator I = succ_begin(BB), E = succ_end(BB); + if (I == E) + return; + + // Keep track of the successors so we don't visit the same successor twice + SmallPtrSet VisitedSuccs; + + // Handle the first successor without using the worklist. + VisitedSuccs.insert(*I); + Pred = BB; + BB = *I; + ++I; + + for (; I != E; ++I) + if (VisitedSuccs.insert(*I).second) + Worklist.push_back(RenamePassData(*I, Pred, IncomingVal)); + goto NextIteration; +} + +/// \brief Compute dominator levels, used by the phi insertion algorithm above. +void MemorySSA::computeDomLevels(DenseMap &DomLevels) { + SmallVector Worklist; + + DomTreeNode *Root = DT->getRootNode(); + DomLevels[Root] = 0; + Worklist.push_back(Root); + + while (!Worklist.empty()) { + DomTreeNode *Node = Worklist.pop_back_val(); + unsigned ChildLevel = DomLevels[Node] + 1; + for (auto CI = Node->begin(), CE = Node->end(); CI != CE; ++CI) { + DomLevels[*CI] = ChildLevel; + Worklist.push_back(*CI); + } + } +} + +/// \brief This handles unreachable block acccesses by deleting phi nodes in +/// unreachable blocks, and marking all other unreachable MemoryAccess's as +/// being uses of the live on entry definition. +void MemorySSA::markUnreachableAsLiveOnEntry(AccessMap &BlockAccesses, + BasicBlock *BB) { + assert(!DT->isReachableFromEntry(BB) && + "Reachable block found while handling unreachable blocks"); + + auto Accesses = BlockAccesses.lookup(BB); + if (!Accesses) + return; + + for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { + auto Next = std::next(AI); + // If we have a phi, just remove it. We are going to replace all + // users with live on entry. + if (isa(&*AI)) { + Accesses->erase(AI); + } else { + AI->setDefiningAccess(LiveOnEntryDef); + } + AI = Next; + } +} + +MemorySSA::MemorySSA(Function &Func) + : F(Func), LiveOnEntryDef(nullptr), nextID(0), builtAlready(false), + Walker(nullptr) {} + +MemorySSA::~MemorySSA() { + InstructionToMemoryAccess.clear(); + // This will destroy and delete all the accesses + for (auto &BA : PerBlockAccesses) + delete BA.second; + PerBlockAccesses.clear(); + delete LiveOnEntryDef; +} + +MemorySSAWalker *MemorySSA::buildMemorySSA(AliasAnalysis *AA, + DominatorTree *DT) { + if (builtAlready) + return Walker; + else + Walker = new CachingMemorySSAWalker(this, AA); + + this->AA = AA; + this->DT = DT; + + // We create an access to represent "live on entry", for things like + // arguments or users of globals. We do not actually insert it in to + // the IR. + BasicBlock &StartingPoint = F.getEntryBlock(); + LiveOnEntryDef = new MemoryDef(nullptr, nullptr, &StartingPoint, nextID++); + + // We temporarily maintain lists of memory accesses per-block, + // trading time for memory. We could just look up the memory access + // for every possible instruction in the stream. Instead, we build + // lists, and then throw it out once the use-def form is built. + SmallPtrSet DefiningBlocks; + + for (auto &B : F) { + AccessListType *Accesses = nullptr; + for (auto &I : B) { + bool use = false; + bool def = false; + AliasAnalysis::ModRefResult ModRef = AA->getModRefInfo(&I); + if (ModRef & AliasAnalysis::Mod) + def = true; + if (ModRef & AliasAnalysis::Ref) + use = true; + + // Defs are already uses, so use && def == def + if (use && !def) { + MemoryUse *MU = new MemoryUse(nullptr, &I, &B); + InstructionToMemoryAccess.insert(std::make_pair(&I, MU)); + if (!Accesses) { + Accesses = new AccessListType(); + PerBlockAccesses.insert(std::make_pair(&B, Accesses)); + } + Accesses->push_back(MU); + } + if (def) { + MemoryDef *MD = new MemoryDef(nullptr, &I, &B, nextID++); + InstructionToMemoryAccess.insert(std::make_pair(&I, MD)); + DefiningBlocks.insert(&B); + if (!Accesses) { + Accesses = new AccessListType(); + PerBlockAccesses.insert(std::make_pair(&B, Accesses)); + } + Accesses->push_back(MD); + } + } + } + // Determine where our PHI's should go + determineInsertionPoint(PerBlockAccesses, DefiningBlocks); + + // Now do regular SSA renaming + SmallPtrSet Visited; + + std::vector RenamePassWorklist; + RenamePassWorklist.push_back({F.begin(), nullptr, LiveOnEntryDef}); + do { + RenamePassData RPD; + RPD.swap(RenamePassWorklist.back()); + RenamePassWorklist.pop_back(); + renamePass(RPD.BB, RPD.Pred, RPD.MA, PerBlockAccesses, RenamePassWorklist, + Visited, Walker); + } while (!RenamePassWorklist.empty()); + + // At this point, we may have unreachable blocks with unreachable accesses + // Given any uses in unreachable blocks the live on entry definition + if (Visited.size() != F.size()) { + for (auto &B : F) + if (!Visited.count(&B)) + markUnreachableAsLiveOnEntry(PerBlockAccesses, &B); + } + + builtAlready = true; + return Walker; +} + +static MemoryAccess *getDefiningAccess(MemoryAccess *MA) { + if (isa(MA) || isa(MA)) + return MA->getDefiningAccess(); + else + return nullptr; +} +static void setDefiningAccess(MemoryAccess *Of, MemoryAccess *To) { + if (isa(Of) || isa(Of)) + Of->setDefiningAccess(To); +} + +/// \brief Properly remove \p MA from all of MemorySSA's lookup tables. +/// +/// Because of the way the intrusive list and use lists work, it is important to +/// do removal in the right order. +void MemorySSA::removeFromLookups(MemoryAccess *MA) { + assert(MA->use_empty() && + "Trying to remove memory access that still has uses"); + setDefiningAccess(MA, nullptr); + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + Instruction *MemoryInst = MA->getMemoryInst(); + if (MemoryInst) + InstructionToMemoryAccess.erase(MemoryInst); + auto *Accesses = PerBlockAccesses[MA->getBlock()]; + Accesses->erase(MA); + if (Accesses->empty()) { + delete Accesses; + PerBlockAccesses.erase(MA->getBlock()); + } +} + +MemoryAccess *MemorySSA::replaceMemoryAccessWithNewAccess( + MemoryAccess *Replacee, Instruction *Replacer, enum InsertionPlace Where) { + BasicBlock *ReplacerBlock = Replacer->getParent(); + + AccessListType *Accesses = nullptr; + auto Result = PerBlockAccesses.insert(std::make_pair(ReplacerBlock, nullptr)); + + if (!Result.second) { + Accesses = Result.first->second; + } else { + Accesses = new AccessListType(); + Result.first->second = Accesses; + } + if (Where == Beginning) + return replaceMemoryAccessWithNewAccess(Replacee, Replacer, + Accesses->begin()); + else + return replaceMemoryAccessWithNewAccess(Replacee, Replacer, + Accesses->end()); +} + +MemoryAccess *MemorySSA::replaceMemoryAccessWithNewAccess( + MemoryAccess *Replacee, Instruction *Replacer, + const AccessListType::iterator &Where) { + + // There is no easy way to assert that you are replacing it with a memory + // access, and this call will assert it for us + AliasAnalysis::ModRefResult ModRef = AA->getModRefInfo(Replacer); + bool def = false, use = false; + if (ModRef & AliasAnalysis::Mod) + def = true; + if (ModRef & AliasAnalysis::Ref) + use = true; + + assert((def || use) && + "Trying to replace a memory access with a non-memory instruction"); + + BasicBlock *ReplacerBlock = Replacer->getParent(); + MemoryAccess *MA = nullptr; + MemoryAccess *DefiningAccess = getDefiningAccess(Replacee); + + // Handle the case we are replacing a phi node, in which case, we don't kill + // the phi node + if (DefiningAccess == nullptr) { + assert(isa(Replacee) && + "Should have been a phi node if we can't get a defining access"); + assert(DT->dominates(Replacee->getBlock(), ReplacerBlock) && + "Need to reuse PHI for defining access, but it will not dominate " + "replacing instruction"); + DefiningAccess = Replacee; + } + + if (def) { + MemoryDef *MD = + new MemoryDef(DefiningAccess, Replacer, ReplacerBlock, nextID++); + MA = MD; + } else if (use) { + MemoryUse *MU = new MemoryUse(DefiningAccess, Replacer, ReplacerBlock); + MA = MU; + } + + AccessListType *Accesses = PerBlockAccesses.lookup(ReplacerBlock); + assert(Accesses && "Can't use iterator insertion for brand new block"); + Accesses->insert(Where, MA); + InstructionToMemoryAccess.insert({Replacer, MA}); + replaceMemoryAccess(Replacee, MA); + return MA; +} + +/// \brief Returns true if \p Replacer dominates \p Replacee . +bool MemorySSA::dominatesUse(MemoryAccess *Replacer, + MemoryAccess *Replacee) const { + if (isa(Replacee) || isa(Replacee)) + return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); + MemoryPhi *MP = cast(Replacee); + for (const auto &Arg : MP->args()) + if (Arg.second == Replacee) + if (!DT->dominates(Replacer->getBlock(), Arg.first)) + return false; + return true; +} + +void MemorySSA::replaceMemoryAccess(MemoryAccess *Replacee, + MemoryAccess *Replacer) { + + // If we don't replace all phi node entries, we can't remove it. + bool replacedAllPhiEntries = true; + // If we are replacing a phi node, we may still actually use it, since we + // may now be defined in terms of it. + bool usedByReplacee = getDefiningAccess(Replacer) == Replacee; + + // Just to note: We can replace the live on entry def, unlike removing it, so + // we don't assert here, but it's almost always a bug, unless you are + // inserting a load/store in a block that dominates the rest of the program. + for (auto U : Replacee->uses()) { + if (U == Replacer) + continue; + assert(dominatesUse(Replacer, Replacee) && + "Definitions will not dominate uses in replacement!"); + if (MemoryPhi *MP = dyn_cast(U)) { + for (unsigned i = 0, e = MP->getNumIncomingValues(); i != e; ++i) { + if (MP->getIncomingValue(i) == Replacee) + MP->setIncomingValue(i, Replacer); + else + replacedAllPhiEntries = false; + } + } else { + U->setDefiningAccess(Replacer); + } + } + // Kill our dead replacee if it's really dead + if (replacedAllPhiEntries && !usedByReplacee) { + removeFromLookups(Replacee); + } +} + +#ifndef NDEBUG +/// \brief Returns true if a phi is defined by the same value on all edges +static bool onlySingleValue(MemoryPhi *MP) { + MemoryAccess *MA = nullptr; + + for (const auto &Arg : MP->args()) { + if (!MA) + MA = Arg.second; + else if (MA != Arg.second) + return false; + } + return true; +} +#endif + +void MemorySSA::removeMemoryAccess(MemoryAccess *MA) { + assert(MA != LiveOnEntryDef && "Trying to remove the live on entry def"); + // We can only delete phi nodes if they are use empty + if (MemoryPhi *MP = dyn_cast(MA)) { + // This code only used in assert builds + (void)MP; + assert(MP->use_empty() && "We can't delete memory phis that still have " + "uses, we don't know where the uses should " + "repoint to!"); + assert(onlySingleValue(MP) && "This phi still points to multiple values, " + "which means it is still needed"); + } else { + MemoryAccess *DefiningAccess = getDefiningAccess(MA); + // Re-point the uses at our defining access + for (auto U : MA->uses()) { + assert(dominatesUse(DefiningAccess, U) && + "Definitions will not dominate uses in removal!"); + if (MemoryPhi *MP = dyn_cast(U)) { + for (unsigned i = 0, e = MP->getNumIncomingValues(); i != e; ++i) { + if (MP->getIncomingValue(i) == MA) + MP->setIncomingValue(i, DefiningAccess); + } + } else { + U->setDefiningAccess(DefiningAccess); + } + } + } + + // The call below to erase will destroy MA, so we can't change the order we + // are doing things here + removeFromLookups(MA); +} + +void MemorySSA::print(raw_ostream &OS) const { + MemorySSAAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void MemorySSA::dump(Function &F) { + MemorySSAAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +/// \brief Verify the domination properties of MemorySSA by checking that each +/// definition dominates all of its uses. +void MemorySSA::verifyDomination(Function &F) { + for (auto &B : F) { + // Phi nodes are attached to basic blocks + MemoryAccess *MA = getMemoryAccess(&B); + if (MA) { + MemoryPhi *MP = cast(MA); + for (const auto &U : MP->uses()) { + BasicBlock *UseBlock; + // Phi operands are used on edges, we simulate the right domination by + // acting as if the use occurred at the end of the predecessor block. + if (MemoryPhi *P = dyn_cast(U)) { + for (const auto &Arg : P->args()) { + if (Arg.second == MP) { + UseBlock = Arg.first; + break; + } + } + } else { + UseBlock = U->getBlock(); + } + assert(DT->dominates(MP->getBlock(), UseBlock) && + "Memory PHI does not dominate it's uses"); + } + } + for (auto &I : B) { + MA = getMemoryAccess(&I); + if (MA) { + if (MemoryDef *MD = dyn_cast(MA)) + for (const auto &U : MD->uses()) { + BasicBlock *UseBlock; + // Things are allowed to flow to phi nodes over their predecessor + // edge. + if (MemoryPhi *P = dyn_cast(U)) { + for (const auto &Arg : P->args()) { + if (Arg.second == MD) { + UseBlock = Arg.first; + break; + } + } + } else { + UseBlock = U->getBlock(); + } + assert(DT->dominates(MD->getBlock(), UseBlock) && + "Memory Def does not dominate it's uses"); + } + } + } + } +} + +/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use +/// appears in the use list of \p Def. +void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) { + // The live on entry use may cause us to get a NULL def here + if (Def == nullptr) { + assert(isLiveOnEntryDef(Use) && + "Null def but use not point to live on entry def"); + return; + } + assert(Def->findUse(Use) && "Did not find use in def's use list"); +} + +/// \brief Verify the immediate use information, by walking all the memory +/// accesses and verifying that, for each use, it appears in the +/// appropriate def's use list +void MemorySSA::verifyDefUses(Function &F) { + for (auto &B : F) { + // Phi nodes are attached to basic blocks + MemoryAccess *MA = getMemoryAccess(&B); + if (MA) { + assert(isa(MA) && + "Something other than phi node on basic block"); + MemoryPhi *MP = cast(MA); + for (unsigned i = 0, e = MP->getNumIncomingValues(); i != e; ++i) + verifyUseInDefs(MP->getIncomingValue(i), MP); + } + for (auto &I : B) { + MA = getMemoryAccess(&I); + if (MA) { + if (MemoryPhi *MP = dyn_cast(MA)) { + for (unsigned i = 0, e = MP->getNumIncomingValues(); i != e; ++i) + verifyUseInDefs(MP->getIncomingValue(i), MP); + } else { + verifyUseInDefs(MA->getDefiningAccess(), MA); + } + } + } + } +} + +MemoryAccess *MemorySSA::getMemoryAccess(const Value *I) const { + return InstructionToMemoryAccess.lookup(I); +} + +void MemoryDef::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + + OS << getID() << " = " + << "MemoryDef("; + if (UO && UO->getID() != 0) + OS << UO->getID(); + else + OS << "liveOnEntry"; + OS << ")"; +} + +void MemoryPhi::print(raw_ostream &OS) const { + OS << getID() << " = " + << "MemoryPhi("; + for (unsigned int i = 0, e = getNumIncomingValues(); i != e; ++i) { + BasicBlock *BB = getIncomingBlock(i); + MemoryAccess *MA = getIncomingValue(i); + OS << "{"; + if (BB->hasName()) + OS << BB->getName(); + else + BB->printAsOperand(OS, false); + OS << ","; + assert((isa(MA) || isa(MA)) && + "Phi node should have referred to def or another phi"); + if (MA->getID() != 0) + OS << MA->getID(); + else + OS << "liveOnEntry"; + OS << "}"; + if (i + 1 < e) + OS << ","; + } + OS << ")"; +} + +MemoryAccess::~MemoryAccess() {} + +void MemoryUse::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + OS << "MemoryUse("; + if (UO && UO->getID() != 0) + OS << UO->getID(); + else + OS << "liveOnEntry"; + OS << ")"; +} + +void MemoryAccess::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +char MemorySSAPrinterPass::ID = 0; + +MemorySSAPrinterPass::MemorySSAPrinterPass() : FunctionPass(ID) { + initializeMemorySSAPrinterPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAPrinterPass::releaseMemory() { + delete MSSA; + delete Walker; +} + +void MemorySSAPrinterPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequiredTransitive(); + AU.addRequired(); +} + +bool MemorySSAPrinterPass::doInitialization(Module &M) { + + VerifyMemorySSA = + M.getContext() + .template getOption(); + return false; +} + +void MemorySSAPrinterPass::registerOptions() { + OptionRegistry::registerOption( + "verify-memoryssa", "Run the Memory SSA verifier", false); +} + +void MemorySSAPrinterPass::print(raw_ostream &OS, const Module *M) const { + MSSA->dump(*F); +} + +bool MemorySSAPrinterPass::runOnFunction(Function &F) { + this->F = &F; + MSSA = new MemorySSA(F); + AliasAnalysis *AA = &getAnalysis(); + DominatorTree *DT = &getAnalysis().getDomTree(); + Walker = MSSA->buildMemorySSA(AA, DT); + + if (VerifyMemorySSA) { + MSSA->verifyDefUses(F); + MSSA->verifyDomination(F); + } + + return false; +} + +char MemorySSALazy::ID = 0; + +MemorySSALazy::MemorySSALazy() : FunctionPass(ID) { + initializeMemorySSALazyPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSALazy::releaseMemory() { delete MSSA; } + +bool MemorySSALazy::runOnFunction(Function &F) { + MSSA = new MemorySSA(F); + return false; +} + +MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} + +CachingMemorySSAWalker::CachingMemorySSAWalker(MemorySSA *M, AliasAnalysis *A) + : MemorySSAWalker(M), AA(A) {} + +CachingMemorySSAWalker::~CachingMemorySSAWalker() { + CachedUpwardsClobberingAccess.clear(); + CachedUpwardsClobberingCall.clear(); +} + +struct CachingMemorySSAWalker::UpwardsMemoryQuery { + // True if our original query started off as a call + bool isCall; + // The pointer location we are going to query about. This will be + // empty if isCall is true + AliasAnalysis::Location Loc; + // This is the instruction we were querying about. + const Instruction *Inst; + // Set of visited Instructions for this query + SmallPtrSet Visited; + // Set of visited call accesses for this query This is separated out because + // you can always cache and lookup the result of call queries (IE when + // isCall == true) for every call in the chain. The calls have no AA + // location associated with them with them, and thus, no context dependence. + SmallPtrSet VisitedCalls; +}; + +void CachingMemorySSAWalker::doCacheInsert(const MemoryAccess *M, + MemoryAccess *Result, + const UpwardsMemoryQuery &Q) { + if (Q.isCall) + CachedUpwardsClobberingCall.insert(std::make_pair(M, Result)); + else + CachedUpwardsClobberingAccess.insert( + std::make_pair(std::make_pair(M, Q.Loc), Result)); +} + +MemoryAccess * +CachingMemorySSAWalker::doCacheLookup(const MemoryAccess *M, + const UpwardsMemoryQuery &Q) { + ++NumClobberCacheLookups; + MemoryAccess *Result; + + if (Q.isCall) + Result = CachedUpwardsClobberingCall.lookup(M); + else + Result = CachedUpwardsClobberingAccess.lookup(std::make_pair(M, Q.Loc)); + + if (Result) { + ++NumClobberCacheHits; + return Result; + } + return nullptr; +} + +/// \brief Walk the use-def chains starting at \p P and find +/// the MemoryAccess that actually clobbers Loc. +/// +/// \returns a pair of clobbering memory access and whether we hit a cyclic phi +/// node. +std::pair +CachingMemorySSAWalker::getClobberingMemoryAccess( + MemoryPhi *P, struct UpwardsMemoryQuery &Q) { + + bool HitVisited = false; + + ++NumClobberCacheLookups; + auto CacheResult = doCacheLookup(P, Q); + if (CacheResult) + return std::make_pair(CacheResult, false); + + // The algorithm here is fairly simple. The goal is to prove that + // the phi node doesn't matter for this alias location, and to get + // to whatever Access occurs before the *split* point that caused + // the phi node. + // There are only two cases we can walk through: + // 1. One argument dominates the other, and the other's argument + // defining memory access is non-aliasing with our location. + // 2. All of the arguments are non-aliasing with our location, and + // eventually lead back to the same defining memory access. + MemoryAccess *Result = nullptr; + + // Don't try to walk past an incomplete phi node during construction + if (P->getNumIncomingValues() != P->getNumPreds()) + return std::make_pair(P, false); + + // If we already got here once, and didn't get to an answer (if we + // did, it would have been cached below), we must be stuck in + // mutually recursive phi nodes. In that case, the correct answer + // is "we can ignore the phi node if all the other arguments turn + // out okay" (since it cycles between itself and the other + // arguments). We return true here, and are careful to make sure we + // only pass through "true" when we are giving results + // for the cycle itself. + if (!Q.Visited.insert(P).second) + return std::make_pair(P, true); + + // Look through 1 argument phi nodes + if (P->getNumIncomingValues() == 1) { + auto SingleResult = getClobberingMemoryAccess(P->getIncomingValue(0), Q); + + HitVisited = SingleResult.second; + Result = SingleResult.first; + } else { + MemoryAccess *TargetResult = nullptr; + + // This is true if we hit ourselves from every argument + bool AllVisited = true; + for (unsigned i = 0; i < P->getNumIncomingValues(); ++i) { + MemoryAccess *Arg = P->getIncomingValue(i); + auto ArgResult = getClobberingMemoryAccess(Arg, Q); + if (!ArgResult.second) { + AllVisited = false; + // Fill in target result we are looking for if we haven't so far + // Otherwise check the argument is equal to the last one + if (!TargetResult) { + TargetResult = ArgResult.first; + } else if (TargetResult != ArgResult.first) { + Result = P; + HitVisited = false; + break; + } + } + } + // See if we completed either with all visited, or with success + if (!Result && AllVisited) { + Result = P; + HitVisited = true; + } else if (!Result && TargetResult) { + Result = TargetResult; + HitVisited = false; + } + } + doCacheInsert(P, Result, Q); + + return std::make_pair(Result, HitVisited); +} + +/// \brief Return true if \p QueryInst could possibly have a Mod result with \p +/// DefInst. This is false, if for example, \p DefInst is a volatile load, and +/// \p QueryInst is not. +static bool possiblyAffectedBy(const Instruction *QueryInst, + const Instruction *DefInst) { + if (isa(DefInst) && isa(QueryInst)) { + const LoadInst *DefLI = cast(DefInst); + const LoadInst *QueryLI = cast(QueryInst); + // An unordered load can't be clobbered by an ordered one in any + // circumstances. + if (QueryLI->isUnordered() && !DefLI->isUnordered()) + return false; + } + return true; +} + +/// \brief Walk the use-def chains starting at \p MA and find +/// the MemoryAccess that actually clobbers Loc. +/// +/// \returns a pair of clobbering memory access and whether we hit a cyclic phi +/// node. +std::pair +CachingMemorySSAWalker::getClobberingMemoryAccess( + MemoryAccess *MA, struct UpwardsMemoryQuery &Q) { + MemoryAccess *CurrAccess = MA; + while (true) { + // Should be either a Memory Def or a Phi node at this point + if (MemoryPhi *P = dyn_cast(CurrAccess)) + return getClobberingMemoryAccess(P, Q); + else { + MemoryDef *MD = dyn_cast(CurrAccess); + assert(MD && "Use linked to something that is not a def"); + // If we hit the top, stop + if (MSSA->isLiveOnEntryDef(MD)) + return std::make_pair(CurrAccess, false); + Instruction *DefMemoryInst = MD->getMemoryInst(); + assert(DefMemoryInst && + "Defining instruction not actually an instruction"); + + // While we can do lookups, we can't sanely do inserts here unless we + // were to track every thing we saw along the way, since we don't + // know where we will stop. + if (auto CacheResult = doCacheLookup(CurrAccess, Q)) + return std::make_pair(CacheResult, false); + if (!Q.isCall) { + // Okay, well, see if it's a volatile load vs non-volatile load + // situation or smoething similar) + if (possiblyAffectedBy(Q.Inst, DefMemoryInst)) + // Check whether our memory location is modified by this instruction + if (AA->getModRefInfo(DefMemoryInst, Q.Loc) & AliasAnalysis::Mod) + break; + } else { + // If this is a call, try then mark it for caching + if (ImmutableCallSite(DefMemoryInst)) { + Q.VisitedCalls.insert(MD); + } + if (AA->instructionClobbersCall(DefMemoryInst, Q.Inst)) + break; + } + } + MemoryAccess *NextAccess = cast(CurrAccess)->getDefiningAccess(); + // Walk from def to def + CurrAccess = NextAccess; + } + doCacheInsert(MA, CurrAccess, Q); + doCacheInsert(CurrAccess, CurrAccess, Q); + return std::make_pair(CurrAccess, false); +} + +MemoryAccess * +CachingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *StartingAccess = MSSA->getMemoryAccess(I); + struct UpwardsMemoryQuery Q; + + // First extract our location, then start walking until it is + // clobbered + // For calls, we store the call instruction we started with in + // Loc.Ptr + AliasAnalysis::Location Loc(I); + + // We can't sanely do anything with a FenceInst, they conservatively + // clobber all memory, and have no locations to get pointers from to + // try to disambiguate + if (isa(I)) { + return StartingAccess; + } else if (!isa(I) && !isa(I)) { + Q.isCall = false; + Q.Loc = AA->getLocation(I); + Q.Inst = I; + } else { + Q.isCall = true; + Q.Inst = I; + } + auto CacheResult = doCacheLookup(StartingAccess, Q); + if (CacheResult) + return CacheResult; + + // If we started with a heap use, walk to the def + StartingAccess = StartingAccess->getDefiningAccess(); + + SmallPtrSet Visited; + MemoryAccess *FinalAccess = + getClobberingMemoryAccess(StartingAccess, Q).first; + doCacheInsert(StartingAccess, FinalAccess, Q); + if (Q.isCall) { + for (const auto &C : Q.VisitedCalls) + doCacheInsert(C, FinalAccess, Q); + } + + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *StartingAccess << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *FinalAccess << "\n"); + + return FinalAccess; +} + +MemoryAccess * +DoNothingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *MA = MSSA->getMemoryAccess(I); + if (isa(MA) || isa(MA)) + return MA; + return MA->getDefiningAccess(); +} Index: test/Transforms/Util/memoryssa-cyclicphi.ll =================================================================== --- /dev/null +++ test/Transforms/Util/memoryssa-cyclicphi.ll @@ -0,0 +1,39 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1| FileCheck %s + +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" + +%struct.hoge = type { i32, %struct.widget } +%struct.widget = type { i64 } +define hidden void @quux() align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* undef, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* undef, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK-NEXT: br i1 undef, label %bb68, label %bb77 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(3) +; MemoryUse(3) +; CHECK-NEXT: %tmp69 = load i64, i64* null, align 8 + %tmp69 = load i64, i64* null, align 8 +; CHECK: 1 = MemoryDef(3) +; 1 = MemoryDef(3) +; CHECK-NEXT: store i64 %tmp69, i64* %tmp, align 8 + store i64 %tmp69, i64* %tmp, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 2 = MemoryPhi({bb68,1},{bb26,3}) +; 2 = MemoryPhi({bb68,1},{bb26,3}) +; CHECK: MemoryUse(2) +; MemoryUse(2) +; CHECK-NEXT: %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp79 = getelementptr inbounds i64, i64* %tmp78, i64 undef + br label %bb26 +} Index: test/Transforms/Util/memoryssa-no-disconnected.ll =================================================================== --- /dev/null +++ test/Transforms/Util/memoryssa-no-disconnected.ll @@ -0,0 +1,48 @@ +; RUN: opt -basicaa -print-memoryssa -analyze -verify-memoryssa < %s 2>&1| FileCheck %s +; This test ensures we don't end up with multiple reaching defs for a single use/phi edge +; If we were to optimize defs, we would end up with 2= MemoryDef(liveOnEntry) and 4 = MemoryDef(liveOnEntry) +; Both would mean both 1,2, and 3,4 would reach the phi node. Because the phi node can only have one entry +; on each edge, it would choose 2, 4 and disconnect 1 and 3 completely from the SSA graph, even though they +; are not dead +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +define void @sink_store(i32 %index, i32* noalias nocapture %foo, i32* noalias nocapture %bar) { +entry: + %cmp = trunc i32 %index to i1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry +; CHECK: 1 = MemoryDef(liveOnEntry) +; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 %index, i32* %foo, align 4, !alias.scope !0 + store i32 %index, i32* %foo, align 4, !alias.scope !0 +; CHECK: 2 = MemoryDef(1) +; 2 = MemoryDef(1) +; CHECK-NEXT: store i32 %index, i32* %bar, align 4, !noalias !0 + store i32 %index, i32* %bar, align 4, !noalias !0 + br label %if.end + +if.else: ; preds = %entry +; CHECK: 3 = MemoryDef(liveOnEntry) +; 3 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 %index, i32* %foo, align 4, !alias.scope !0 + store i32 %index, i32* %foo, align 4, !alias.scope !0 +; CHECK: 4 = MemoryDef(3) +; 4 = MemoryDef(3) +; CHECK-NEXT: store i32 %index, i32* %bar, align 4, !noalias !0 + store i32 %index, i32* %bar, align 4, !noalias !0 + br label %if.end + +if.end: ; preds = %if.else, %if.then +; CHECK: 5 = MemoryPhi({if.then,2},{if.else,4}) +; 5 = MemoryPhi({if.then,2},{if.else,4}) +; CHECK: MemoryUse(5) +; MemoryUse(5) +; CHECK-NEXT: %c = load i32, i32* %foo, !alias.scope !0 + %c = load i32, i32* %foo, !alias.scope !0 +; CHECK: MemoryUse(5) +; MemoryUse(5) +; CHECK-NEXT: %d = load i32, i32* %bar, !noalias !0 + %d = load i32, i32* %bar, !noalias !0 + ret void +} +!0 = !{!0} Index: test/Transforms/Util/memoryssa-optimize-use.ll =================================================================== --- /dev/null +++ test/Transforms/Util/memoryssa-optimize-use.ll @@ -0,0 +1,54 @@ +; RUN: opt -basicaa -print-memoryssa -analyze -verify-memoryssa < %s 2>&1| FileCheck %s + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +; Function Attrs: ssp uwtable +define i32 @main() #0 { +entry: +; CHECK: 1 = MemoryDef(liveOnEntry) +; 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %call = call noalias i8* @_Znwm(i64 4) #2 + %call = call noalias i8* @_Znwm(i64 4) #2 + %0 = bitcast i8* %call to i32* +; CHECK: 2 = MemoryDef(1) +; 2 = MemoryDef(1) +; CHECK-NEXT: %call1 = call noalias i8* @_Znwm(i64 4) #2 + %call1 = call noalias i8* @_Znwm(i64 4) #2 + %1 = bitcast i8* %call1 to i32* +; CHECK: 3 = MemoryDef(2) +; 3 = MemoryDef(2) +; CHECK-NEXT: store i32 5, i32* %0, align 4 + store i32 5, i32* %0, align 4 +; CHECK: 4 = MemoryDef(3) +; 4 = MemoryDef(3) +; CHECK-NEXT: store i32 7, i32* %1, align 4 + store i32 7, i32* %1, align 4 +; CHECK: MemoryUse(3) +; MemoryUse(3) +; CHECK-NEXT: %2 = load i32, i32* %0, align 4 + %2 = load i32, i32* %0, align 4 +; CHECK: MemoryUse(4) +; MemoryUse(4) +; CHECK-NEXT: %3 = load i32, i32* %1, align 4 + %3 = load i32, i32* %1, align 4 +; CHECK: MemoryUse(3) +; MemoryUse(3) +; CHECK-NEXT: %4 = load i32, i32* %0, align 4 + %4 = load i32, i32* %0, align 4 +; CHECK: MemoryUse(4) +; MemoryUse(4) +; CHECK-NEXT: %5 = load i32, i32* %1, align 4 + %5 = load i32, i32* %1, align 4 + %add = add nsw i32 %3, %5 + ret i32 %add +} +declare noalias i8* @_Znwm(i64) #1 + +attributes #0 = { ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nobuiltin "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { builtin } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"PIC Level", i32 2} +!1 = !{!"clang version 3.7.0 (http://llvm.org/git/clang.git c9903b44534b8b5debcdbe375ee5c1fec6cc7243) (llvm/trunk 228022)"}