Index: llvm/trunk/include/llvm/IR/Function.h =================================================================== --- llvm/trunk/include/llvm/IR/Function.h +++ llvm/trunk/include/llvm/IR/Function.h @@ -543,6 +543,12 @@ Constant *getPrologueData() const; void setPrologueData(Constant *PrologueData); + /// Print the function to an output stream with an optional + /// AssemblyAnnotationWriter. + void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW = nullptr, + bool ShouldPreserveUseListOrder = false, + bool IsForDebug = false) const; + /// viewCFG - This function is meant for use from the debugger. You can just /// say 'call F->viewCFG()' and a ghostview window should pop up from the /// program, displaying the CFG of the current function with the code for each Index: llvm/trunk/include/llvm/IR/Value.def =================================================================== --- llvm/trunk/include/llvm/IR/Value.def +++ llvm/trunk/include/llvm/IR/Value.def @@ -54,6 +54,9 @@ HANDLE_VALUE(Argument) HANDLE_VALUE(BasicBlock) +HANDLE_VALUE(MemoryUse) +HANDLE_VALUE(MemoryDef) +HANDLE_VALUE(MemoryPhi) HANDLE_GLOBAL_VALUE(Function) HANDLE_GLOBAL_VALUE(GlobalAlias) Index: llvm/trunk/include/llvm/InitializePasses.h =================================================================== --- llvm/trunk/include/llvm/InitializePasses.h +++ llvm/trunk/include/llvm/InitializePasses.h @@ -205,6 +205,8 @@ void initializeMemDepPrinterPass(PassRegistry&); void initializeMemDerefPrinterPass(PassRegistry&); void initializeMemoryDependenceAnalysisPass(PassRegistry&); +void initializeMemorySSALazyPass(PassRegistry&); +void initializeMemorySSAPrinterPassPass(PassRegistry&); void initializeMergedLoadStoreMotionPass(PassRegistry &); void initializeMetaRenamerPass(PassRegistry&); void initializeMergeFunctionsPass(PassRegistry&); Index: llvm/trunk/include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- llvm/trunk/include/llvm/Transforms/Utils/MemorySSA.h +++ llvm/trunk/include/llvm/Transforms/Utils/MemorySSA.h @@ -0,0 +1,892 @@ +//===- MemorySSA.h - Build Memory SSA ---------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// \file +// \brief This file exposes an interface to building/using memory SSA to +// walk memory instructions using a use/def graph. +// +// Memory SSA class builds an SSA form that links together memory access +// instructions such loads, stores, atomics, and calls. Additionally, it does a +// trivial form of "heap versioning" Every time the memory state changes in the +// program, we generate a new heap version. It generates MemoryDef/Uses/Phis +// that are overlayed on top of the existing instructions. +// +// As a trivial example, +// define i32 @main() #0 { +// entry: +// %call = call noalias i8* @_Znwm(i64 4) #2 +// %0 = bitcast i8* %call to i32* +// %call1 = call noalias i8* @_Znwm(i64 4) #2 +// %1 = bitcast i8* %call1 to i32* +// store i32 5, i32* %0, align 4 +// store i32 7, i32* %1, align 4 +// %2 = load i32* %0, align 4 +// %3 = load i32* %1, align 4 +// %add = add nsw i32 %2, %3 +// ret i32 %add +// } +// +// Will become +// define i32 @main() #0 { +// entry: +// ; 1 = MemoryDef(0) +// %call = call noalias i8* @_Znwm(i64 4) #3 +// %2 = bitcast i8* %call to i32* +// ; 2 = MemoryDef(1) +// %call1 = call noalias i8* @_Znwm(i64 4) #3 +// %4 = bitcast i8* %call1 to i32* +// ; 3 = MemoryDef(2) +// store i32 5, i32* %2, align 4 +// ; 4 = MemoryDef(3) +// store i32 7, i32* %4, align 4 +// ; MemoryUse(4) +// %7 = load i32* %2, align 4 +// ; MemoryUse(3) +// %8 = load i32* %4, align 4 +// %add = add nsw i32 %7, %8 +// ret i32 %add +// } +// +// Given this form, all the stores that could ever effect the load at %8 can be +// gotten by using the MemoryUse associated with it, and walking from use to def +// until you hit the top of the function. +// +// Each def also has a list of users associated with it, so you can walk from +// both def to users, and users to defs. Note that we disambiguate MemoryUses, +// but not the RHS of MemoryDefs. You can see this above at %8, which would +// otherwise be a MemoryUse(4). Being disambiguated means that for a given +// store, all the MemoryUses on its use lists are may-aliases of that store (but +// the MemoryDefs on its use list may not be). +// +// MemoryDefs are not disambiguated because it would require multiple reaching +// definitions, which would require multiple phis, and multiple memoryaccesses +// per instruction. +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_MEMORYSSA_H +#define LLVM_TRANSFORMS_UTILS_MEMORYSSA_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/ilist_node.h" +#include "llvm/ADT/iterator.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/User.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" + +namespace llvm { +class BasicBlock; +class DominatorTree; +class Function; +class MemoryAccess; +template class memoryaccess_def_iterator_base; +using memoryaccess_def_iterator = memoryaccess_def_iterator_base; +using const_memoryaccess_def_iterator = + memoryaccess_def_iterator_base; + +// \brief The base for all memory accesses. All memory accesses in a block are +// linked together using an intrusive list. +class MemoryAccess : public User, public ilist_node { + void *operator new(size_t, unsigned) = delete; + void *operator new(size_t) = delete; + +public: + // Methods for support type inquiry through isa, cast, and + // dyn_cast + static inline bool classof(const MemoryAccess *) { return true; } + static inline bool classof(const Value *V) { + unsigned ID = V->getValueID(); + return ID == MemoryUseVal || ID == MemoryPhiVal || ID == MemoryDefVal; + } + + virtual ~MemoryAccess(); + BasicBlock *getBlock() const { return Block; } + + virtual void print(raw_ostream &OS) const = 0; + virtual void dump() const; + + /// \brief The user iterators for a memory access + typedef user_iterator iterator; + typedef const_user_iterator const_iterator; + + /// \brief This iterator walks over all of the defs in a given + /// MemoryAccess. For MemoryPhi nodes, this walks arguments. For + /// MemoryUse/MemoryDef, this walks the defining access. + memoryaccess_def_iterator defs_begin(); + const_memoryaccess_def_iterator defs_begin() const; + memoryaccess_def_iterator defs_end(); + const_memoryaccess_def_iterator defs_end() const; + +protected: + friend class MemorySSA; + friend class MemoryUseOrDef; + friend class MemoryUse; + friend class MemoryDef; + friend class MemoryPhi; + + /// \brief Used internally to give IDs to MemoryAccesses for printing + virtual unsigned getID() const = 0; + + MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB, + unsigned NumOperands) + : User(Type::getVoidTy(C), Vty, nullptr, NumOperands), Block(BB) {} + +private: + MemoryAccess(const MemoryAccess &); + void operator=(const MemoryAccess &); + BasicBlock *Block; +}; + +template <> +struct ilist_traits : public ilist_default_traits { + /// See details of the instruction class for why this trick works + /// FIXME: The downcast is UB. + MemoryAccess *createSentinel() const { + return static_cast(&Sentinel); + } + + static void destroySentinel(MemoryAccess *) {} + + MemoryAccess *provideInitialHead() const { return createSentinel(); } + MemoryAccess *ensureHead(MemoryAccess *) const { return createSentinel(); } + static void noteHead(MemoryAccess *, MemoryAccess *) {} + +private: + mutable ilist_half_node Sentinel; +}; + +inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) { + MA.print(OS); + return OS; +} + +/// \brief Class that has the common methods + fields of memory uses/defs. It's +/// a little awkward to have, but there are many cases where we want either a +/// use or def, and there are many cases where uses are needed (defs aren't +/// acceptable), and vice-versa. +/// +/// This class should never be instantiated directly; make a MemoryUse or +/// MemoryDef instead. +class MemoryUseOrDef : public MemoryAccess { + void *operator new(size_t, unsigned) = delete; + void *operator new(size_t) = delete; + +public: + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + /// \brief Get the instruction that this MemoryUse represents. + Instruction *getMemoryInst() const { return MemoryInst; } + + /// \brief Get the access that produces the memory state used by this Use. + MemoryAccess *getDefiningAccess() const { return getOperand(0); } + + static inline bool classof(const MemoryUseOrDef *) { return true; } + static inline bool classof(const Value *MA) { + return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal; + } + +protected: + friend class MemorySSA; + + MemoryUseOrDef(LLVMContext &C, MemoryAccess *DMA, unsigned Vty, + Instruction *MI, BasicBlock *BB) + : MemoryAccess(C, Vty, BB, 1), MemoryInst(MI) { + setDefiningAccess(DMA); + } + + void setDefiningAccess(MemoryAccess *DMA) { setOperand(0, DMA); } + +private: + Instruction *MemoryInst; +}; +template <> +struct OperandTraits + : public FixedNumOperandTraits {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUseOrDef, MemoryAccess) + +/// \brief Represents read-only accesses to memory +/// +/// In particular, the set of Instructions that will be represented by +/// MemoryUse's is exactly the set of Instructions for which +/// AliasAnalysis::getModRefInfo returns "Ref". +class MemoryUse final : public MemoryUseOrDef { + void *operator new(size_t, unsigned) = delete; + +public: + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + // allocate space for exactly one operand + void *operator new(size_t s) { return User::operator new(s, 1); } + + MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB) + : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB) {} + + static inline bool classof(const MemoryUse *) { return true; } + static inline bool classof(const Value *MA) { + return MA->getValueID() == MemoryUseVal; + } + + void print(raw_ostream &OS) const override; + +protected: + friend class MemorySSA; + + unsigned getID() const override { + llvm_unreachable("MemoryUses do not have IDs"); + } +}; +template <> +struct OperandTraits : public FixedNumOperandTraits {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryUse, MemoryAccess) + +/// \brief Represents a read-write access to memory, whether it is a must-alias, +/// or a may-alias. +/// +/// In particular, the set of Instructions that will be represented by +/// MemoryDef's is exactly the set of Instructions for which +/// AliasAnalysis::getModRefInfo returns "Mod" or "ModRef". +/// Note that, in order to provide def-def chains, all defs also have a use +/// associated with them. This use points to the nearest reaching +/// MemoryDef/MemoryPhi. +class MemoryDef final : public MemoryUseOrDef { + void *operator new(size_t, unsigned) = delete; + +public: + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + // allocate space for exactly one operand + void *operator new(size_t s) { return User::operator new(s, 1); } + + MemoryDef(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB, + unsigned Ver) + : MemoryUseOrDef(C, DMA, MemoryDefVal, MI, BB), ID(Ver) {} + + static inline bool classof(const MemoryDef *) { return true; } + static inline bool classof(const Value *MA) { + return MA->getValueID() == MemoryDefVal; + } + + void print(raw_ostream &OS) const override; + +protected: + friend class MemorySSA; + + // For debugging only. This gets used to give memory accesses pretty numbers + // when printing them out + unsigned getID() const override { return ID; } + +private: + const unsigned ID; +}; +template <> +struct OperandTraits : public FixedNumOperandTraits {}; +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryDef, MemoryAccess) + +/// \brief Represents phi nodes for memory accesses. +/// +/// These have the same semantic as regular phi nodes, with the exception that +/// only one phi will ever exist in a given basic block. +/// Guaranteeing one phi per block means guaranteeing there is only ever one +/// valid reaching MemoryDef/MemoryPHI along each path to the phi node. +/// This is ensured by not allowing disambiguation of the RHS of a MemoryDef or +/// a MemoryPhi's operands. +/// That is, given +/// if (a) { +/// store %a +/// store %b +/// } +/// it *must* be transformed into +/// if (a) { +/// 1 = MemoryDef(liveOnEntry) +/// store %a +/// 2 = MemoryDef(1) +/// store %b +/// } +/// and *not* +/// if (a) { +/// 1 = MemoryDef(liveOnEntry) +/// store %a +/// 2 = MemoryDef(liveOnEntry) +/// store %b +/// } +/// even if the two stores do not conflict. Otherwise, both 1 and 2 reach the +/// end of the branch, and if there are not two phi nodes, one will be +/// disconnected completely from the SSA graph below that point. +/// Because MemoryUse's do not generate new definitions, they do not have this +/// issue. +class MemoryPhi final : public MemoryAccess { + void *operator new(size_t, unsigned) = delete; + // allocate space for exactly zero operands + void *operator new(size_t s) { return User::operator new(s); } + +public: + /// Provide fast operand accessors + DECLARE_TRANSPARENT_OPERAND_ACCESSORS(MemoryAccess); + + MemoryPhi(LLVMContext &C, BasicBlock *BB, unsigned Ver, unsigned NumPreds = 0) + : MemoryAccess(C, MemoryPhiVal, BB, 0), ID(Ver), ReservedSpace(NumPreds) { + allocHungoffUses(ReservedSpace); + } + + // Block iterator interface. This provides access to the list of incoming + // basic blocks, which parallels the list of incoming values. + typedef BasicBlock **block_iterator; + typedef BasicBlock *const *const_block_iterator; + + block_iterator block_begin() { + auto *Ref = reinterpret_cast(op_begin() + ReservedSpace); + return reinterpret_cast(Ref + 1); + } + + const_block_iterator block_begin() const { + const auto *Ref = + reinterpret_cast(op_begin() + ReservedSpace); + return reinterpret_cast(Ref + 1); + } + + block_iterator block_end() { return block_begin() + getNumOperands(); } + + const_block_iterator block_end() const { + return block_begin() + getNumOperands(); + } + + op_range incoming_values() { return operands(); } + + const_op_range incoming_values() const { return operands(); } + + /// \brief Return the number of incoming edges + unsigned getNumIncomingValues() const { return getNumOperands(); } + + /// \brief Return incoming value number x + MemoryAccess *getIncomingValue(unsigned I) const { return getOperand(I); } + void setIncomingValue(unsigned I, MemoryAccess *V) { + assert(V && "PHI node got a null value!"); + assert(getType() == V->getType() && + "All operands to PHI node must be the same type as the PHI node!"); + setOperand(I, V); + } + static unsigned getOperandNumForIncomingValue(unsigned I) { return I; } + static unsigned getIncomingValueNumForOperand(unsigned I) { return I; } + + /// \brief Return incoming basic block number @p i. + BasicBlock *getIncomingBlock(unsigned I) const { return block_begin()[I]; } + + /// \brief Return incoming basic block corresponding + /// to an operand of the PHI. + BasicBlock *getIncomingBlock(const Use &U) const { + assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?"); + return getIncomingBlock(unsigned(&U - op_begin())); + } + + /// \brief Return incoming basic block corresponding + /// to value use iterator. + BasicBlock *getIncomingBlock(MemoryAccess::const_user_iterator I) const { + return getIncomingBlock(I.getUse()); + } + + void setIncomingBlock(unsigned I, BasicBlock *BB) { + assert(BB && "PHI node got a null basic block!"); + block_begin()[I] = BB; + } + + /// \brief Add an incoming value to the end of the PHI list + void addIncoming(MemoryAccess *V, BasicBlock *BB) { + if (getNumOperands() == ReservedSpace) + growOperands(); // Get more space! + // Initialize some new operands. + setNumHungOffUseOperands(getNumOperands() + 1); + setIncomingValue(getNumOperands() - 1, V); + setIncomingBlock(getNumOperands() - 1, BB); + } + + /// \brief Return the first index of the specified basic + /// block in the value list for this PHI. Returns -1 if no instance. + int getBasicBlockIndex(const BasicBlock *BB) const { + for (unsigned I = 0, E = getNumOperands(); I != E; ++I) + if (block_begin()[I] == BB) + return I; + return -1; + } + + Value *getIncomingValueForBlock(const BasicBlock *BB) const { + int Idx = getBasicBlockIndex(BB); + assert(Idx >= 0 && "Invalid basic block argument!"); + return getIncomingValue(Idx); + } + + static inline bool classof(const MemoryPhi *) { return true; } + static inline bool classof(const Value *V) { + return V->getValueID() == MemoryPhiVal; + } + + void print(raw_ostream &OS) const override; + +protected: + friend class MemorySSA; + /// \brief this is more complicated than the generic + /// User::allocHungoffUses, because we have to allocate Uses for the incoming + /// values and pointers to the incoming blocks, all in one allocation. + void allocHungoffUses(unsigned N) { + User::allocHungoffUses(N, /* IsPhi */ true); + } + + /// For debugging only. This gets used to give memory accesses pretty numbers + /// when printing them out + virtual unsigned getID() const final { return ID; } + +private: + // For debugging only + const unsigned ID; + unsigned ReservedSpace; + + /// \brief This grows the operand list in response to a push_back style of + /// operation. This grows the number of ops by 1.5 times. + void growOperands() { + unsigned E = getNumOperands(); + // 2 op PHI nodes are VERY common, so reserve at least enough for that. + ReservedSpace = std::max(E + E / 2, 2u); + growHungoffUses(ReservedSpace, /* IsPhi */ true); + } +}; +template <> struct OperandTraits : public HungoffOperandTraits<2> {}; + +DEFINE_TRANSPARENT_OPERAND_ACCESSORS(MemoryPhi, MemoryAccess) + +class MemorySSAWalker; + +/// \brief Encapsulates MemorySSA, including all data associated with memory +/// accesses. +class MemorySSA { +public: + MemorySSA(Function &); + ~MemorySSA(); + + /// \brief Build Memory SSA, and return the walker we used during building, + /// for later reuse. If MemorySSA is already built, just return the walker. + MemorySSAWalker *buildMemorySSA(AliasAnalysis *, DominatorTree *); + + /// \brief Returns false if you need to call buildMemorySSA. + bool isFinishedBuilding() const { return Walker; } + + /// \brief Given a memory Mod/Ref'ing instruction, get the MemorySSA + /// access associated with it. If passed a basic block gets the memory phi + /// node that exists for that block, if there is one. Otherwise, this will get + /// a MemoryUseOrDef. + MemoryAccess *getMemoryAccess(const Value *) const; + MemoryPhi *getMemoryAccess(const BasicBlock *BB) const; + + void dump() const; + void print(raw_ostream &) const; + + /// \brief Return true if \p MA represents the live on entry value + /// + /// Loads and stores from pointer arguments and other global values may be + /// defined by memory operations that do not occur in the current function, so + /// they may be live on entry to the function. MemorySSA represents such + /// memory state by the live on entry definition, which is guaranteed to occur + /// before any other memory access in the function. + inline bool isLiveOnEntryDef(const MemoryAccess *MA) const { + return MA == LiveOnEntryDef.get(); + } + + inline MemoryAccess *getLiveOnEntryDef() const { + return LiveOnEntryDef.get(); + } + + using AccessListType = iplist; + + /// \brief Return the list of MemoryAccess's for a given basic block. + /// + /// This list is not modifiable by the user. + const AccessListType *getBlockAccesses(const BasicBlock *BB) const { + auto It = PerBlockAccesses.find(BB); + return It == PerBlockAccesses.end() ? nullptr : It->second.get(); + } + + enum InsertionPlace { Beginning, End }; + + /// \brief Given two memory accesses in the same basic block, determine + /// whether MemoryAccess \p A dominates MemoryAccess \p B. + bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const; + +protected: + // Used by Memory SSA annotater, dumpers, and wrapper pass + friend class MemorySSAAnnotatedWriter; + friend class MemorySSAPrinterPass; + void verifyDefUses(Function &F); + void verifyDomination(Function &F); + +private: + void verifyUseInDefs(MemoryAccess *, MemoryAccess *); + using AccessMap = + DenseMap>; + + void + determineInsertionPoint(const SmallPtrSetImpl &DefiningBlocks); + void computeDomLevels(DenseMap &DomLevels); + void markUnreachableAsLiveOnEntry(BasicBlock *BB); + bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; + MemoryAccess *createNewAccess(Instruction *, bool ignoreNonMemory = false); + MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); + + MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *); + void renamePass(DomTreeNode *, MemoryAccess *IncomingVal, + SmallPtrSet &Visited); + AccessListType *getOrCreateAccessList(BasicBlock *); + AliasAnalysis *AA; + DominatorTree *DT; + Function &F; + + // Memory SSA mappings + DenseMap InstructionToMemoryAccess; + AccessMap PerBlockAccesses; + std::unique_ptr LiveOnEntryDef; + + // Memory SSA building info + MemorySSAWalker *Walker; + unsigned NextID; +}; + +// This pass does eager building and then printing of MemorySSA. It is used by +// the tests to be able to build, dump, and verify Memory SSA. +class MemorySSAPrinterPass : public FunctionPass { +public: + MemorySSAPrinterPass(); + + static char ID; + bool doInitialization(Module &M) override; + bool runOnFunction(Function &) override; + void releaseMemory() override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *M) const override; + static void registerOptions(); + MemorySSA &getMSSA() { return *MSSA; } + +private: + bool VerifyMemorySSA; + + std::unique_ptr MSSA; + // FIXME(gbiv): It seems that MemorySSA doesn't own the walker it returns? + std::unique_ptr Walker; + Function *F; +}; + +class MemorySSALazy : public FunctionPass { +public: + MemorySSALazy(); + + static char ID; + bool runOnFunction(Function &) override; + void releaseMemory() override; + MemorySSA &getMSSA() { + assert(MSSA); + return *MSSA; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + } + +private: + std::unique_ptr MSSA; +}; + +/// \brief This is the generic walker interface for walkers of MemorySSA. +/// Walkers are used to be able to further disambiguate the def-use chains +/// MemorySSA gives you, or otherwise produce better info than MemorySSA gives +/// you. +/// In particular, while the def-use chains provide basic information, and are +/// guaranteed to give, for example, the nearest may-aliasing MemoryDef for a +/// MemoryUse as AliasAnalysis considers it, a user mant want better or other +/// information. In particular, they may want to use SCEV info to further +/// disambiguate memory accesses, or they may want the nearest dominating +/// may-aliasing MemoryDef for a call or a store. This API enables a +/// standardized interface to getting and using that info. +class MemorySSAWalker { +public: + MemorySSAWalker(MemorySSA *); + virtual ~MemorySSAWalker() {} + + using MemoryAccessSet = SmallVector; + + /// \brief Given a memory Mod/Ref/ModRef'ing instruction, calling this + /// will give you the nearest dominating MemoryAccess that Mod's the location + /// the instruction accesses (by skipping any def which AA can prove does not + /// alias the location(s) accessed by the instruction given). + /// + /// Note that this will return a single access, and it must dominate the + /// Instruction, so if an operand of a MemoryPhi node Mod's the instruction, + /// this will return the MemoryPhi, not the operand. This means that + /// given: + /// if (a) { + /// 1 = MemoryDef(liveOnEntry) + /// store %a + /// } else { + /// 2 = MemoryDef(liveOnEntry) + /// store %b + /// } + /// 3 = MemoryPhi(2, 1) + /// MemoryUse(3) + /// load %a + /// + /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef + /// in the if (a) branch. + virtual MemoryAccess *getClobberingMemoryAccess(const Instruction *) = 0; + + /// \brief Given a potentially clobbering memory access and a new location, + /// calling this will give you the nearest dominating clobbering MemoryAccess + /// (by skipping non-aliasing def links). + /// + /// This version of the function is mainly used to disambiguate phi translated + /// pointers, where the value of a pointer may have changed from the initial + /// memory access. Note that this expects to be handed either a MemoryUse, + /// or an already potentially clobbering access. Unlike the above API, if + /// given a MemoryDef that clobbers the pointer as the starting access, it + /// will return that MemoryDef, whereas the above would return the clobber + /// starting from the use side of the memory def. + virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + MemoryLocation &) = 0; + +protected: + MemorySSA *MSSA; +}; + +/// \brief A MemorySSAWalker that does no alias queries, or anything else. It +/// simply returns the links as they were constructed by the builder. +class DoNothingMemorySSAWalker final : public MemorySSAWalker { +public: + MemoryAccess *getClobberingMemoryAccess(const Instruction *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + MemoryLocation &) override; +}; + +using MemoryAccessPair = std::pair; +using ConstMemoryAccessPair = std::pair; + +/// \brief A MemorySSAWalker that does AA walks and caching of lookups to +/// disambiguate accesses. +class CachingMemorySSAWalker final : public MemorySSAWalker { +public: + CachingMemorySSAWalker(MemorySSA *, AliasAnalysis *, DominatorTree *); + virtual ~CachingMemorySSAWalker(); + MemoryAccess *getClobberingMemoryAccess(const Instruction *) override; + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, + MemoryLocation &) override; + +protected: + struct UpwardsMemoryQuery; + MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &, + const MemoryLocation &); + + void doCacheInsert(const MemoryAccess *, MemoryAccess *, + const UpwardsMemoryQuery &, const MemoryLocation &); + + void doCacheRemove(const MemoryAccess *, const UpwardsMemoryQuery &, + const MemoryLocation &); + +private: + MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &, + UpwardsMemoryQuery &, bool); + MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &); + bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &, + const MemoryLocation &Loc) const; + SmallDenseMap + CachedUpwardsClobberingAccess; + DenseMap CachedUpwardsClobberingCall; + AliasAnalysis *AA; + DominatorTree *DT; +}; + +/// \brief Iterator base class used to implement const and non-const iterators +/// over the defining accesses of a MemoryAccess. +template +class memoryaccess_def_iterator_base + : public iterator_facade_base, + std::forward_iterator_tag, T, ptrdiff_t, T *, + T *> { + using BaseT = typename memoryaccess_def_iterator_base::iterator_facade_base; + +public: + memoryaccess_def_iterator_base(T *Start) : Access(Start), ArgNo(0) {} + memoryaccess_def_iterator_base() : Access(nullptr), ArgNo(0) {} + bool operator==(const memoryaccess_def_iterator_base &Other) const { + return Access == Other.Access && (!Access || ArgNo == Other.ArgNo); + } + + // This is a bit ugly, but for MemoryPHI's, unlike PHINodes, you can't get the + // block from the operand in constant time (In a PHINode, the uselist has + // both, so it's just subtraction). We provide it as part of the + // iterator to avoid callers having to linear walk to get the block. + // If the operation becomes constant time on MemoryPHI's, this bit of + // abstraction breaking should be removed. + BasicBlock *getPhiArgBlock() const { + MemoryPhi *MP = dyn_cast(Access); + assert(MP && "Tried to get phi arg block when not iterating over a PHI"); + return MP->getIncomingBlock(ArgNo); + } + typename BaseT::iterator::pointer operator*() const { + assert(Access && "Tried to access past the end of our iterator"); + // Go to the first argument for phis, and the defining access for everything + // else. + if (MemoryPhi *MP = dyn_cast(Access)) + return MP->getIncomingValue(ArgNo); + return cast(Access)->getDefiningAccess(); + } + using BaseT::operator++; + memoryaccess_def_iterator &operator++() { + assert(Access && "Hit end of iterator"); + if (MemoryPhi *MP = dyn_cast(Access)) { + if (++ArgNo >= MP->getNumIncomingValues()) { + ArgNo = 0; + Access = nullptr; + } + } else { + Access = nullptr; + } + return *this; + } + +private: + T *Access; + unsigned ArgNo; +}; + +inline memoryaccess_def_iterator MemoryAccess::defs_begin() { + return memoryaccess_def_iterator(this); +} +inline const_memoryaccess_def_iterator MemoryAccess::defs_begin() const { + return const_memoryaccess_def_iterator(this); +} + +inline memoryaccess_def_iterator MemoryAccess::defs_end() { + return memoryaccess_def_iterator(); +} + +inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const { + return const_memoryaccess_def_iterator(); +} + +/// \brief GraphTraits for a MemoryAccess, which walks defs in the normal case, +/// and uses in the inverse case. +template <> struct GraphTraits { + using NodeType = MemoryAccess; + using ChildIteratorType = memoryaccess_def_iterator; + + static NodeType *getEntryNode(NodeType *N) { return N; } + static inline ChildIteratorType child_begin(NodeType *N) { + return N->defs_begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { + return N->defs_end(); + } +}; + +template <> struct GraphTraits> { + using NodeType = MemoryAccess; + using ChildIteratorType = MemoryAccess::iterator; + + static NodeType *getEntryNode(NodeType *N) { return N; } + static inline ChildIteratorType child_begin(NodeType *N) { + return N->user_begin(); + } + static inline ChildIteratorType child_end(NodeType *N) { + return N->user_end(); + } +}; + +/// \brief Provide an iterator that walks defs, giving both the memory access, +/// and the current pointer location, updating the pointer location as it +/// changes due to phi node translation. +/// +/// This iterator, while somewhat specialized, is what most clients actually +/// want when walking upwards through MemorySSA def chains. It takes a pair of +/// , and walks defs, properly translating the +/// memory location through phi nodes for the user. +class upward_defs_iterator + : public iterator_facade_base { + using BaseT = upward_defs_iterator::iterator_facade_base; + +public: + upward_defs_iterator(const MemoryAccessPair &Info) + : DefIterator(Info.first), Location(Info.second), + OriginalAccess(Info.first) { + CurrentPair.first = nullptr; + + WalkingPhi = Info.first && isa(Info.first); + fillInCurrentPair(); + } + + upward_defs_iterator() + : DefIterator(), Location(), OriginalAccess(), WalkingPhi(false) { + CurrentPair.first = nullptr; + } + + bool operator==(const upward_defs_iterator &Other) const { + return DefIterator == Other.DefIterator; + } + + typename BaseT::iterator::reference operator*() const { + assert(DefIterator != OriginalAccess->defs_end() && + "Tried to access past the end of our iterator"); + return CurrentPair; + } + + using BaseT::operator++; + upward_defs_iterator &operator++() { + assert(DefIterator != OriginalAccess->defs_end() && + "Tried to access past the end of the iterator"); + ++DefIterator; + if (DefIterator != OriginalAccess->defs_end()) + fillInCurrentPair(); + return *this; + } + + BasicBlock *getPhiArgBlock() const { return DefIterator.getPhiArgBlock(); } + +private: + void fillInCurrentPair() { + CurrentPair.first = *DefIterator; + if (WalkingPhi && Location.Ptr) { + PHITransAddr Translator( + const_cast(Location.Ptr), + OriginalAccess->getBlock()->getModule()->getDataLayout(), nullptr); + if (!Translator.PHITranslateValue(OriginalAccess->getBlock(), + DefIterator.getPhiArgBlock(), nullptr, + false)) + if (Translator.getAddr() != Location.Ptr) { + CurrentPair.second = Location.getWithNewPtr(Translator.getAddr()); + return; + } + } + CurrentPair.second = Location; + } + + MemoryAccessPair CurrentPair; + memoryaccess_def_iterator DefIterator; + MemoryLocation Location; + MemoryAccess *OriginalAccess; + bool WalkingPhi; +}; + +inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) { + return upward_defs_iterator(Pair); +} +inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); } +} + +#endif Index: llvm/trunk/lib/IR/AsmWriter.cpp =================================================================== --- llvm/trunk/lib/IR/AsmWriter.cpp +++ llvm/trunk/lib/IR/AsmWriter.cpp @@ -3215,6 +3215,17 @@ // External Interface declarations //===----------------------------------------------------------------------===// +void Function::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, + bool ShouldPreserveUseListOrder, + bool IsForDebug) const { + SlotTracker SlotTable(this->getParent()); + formatted_raw_ostream OS(ROS); + AssemblyWriter W(OS, SlotTable, this->getParent(), AAW, + IsForDebug, + ShouldPreserveUseListOrder); + W.printFunction(this); +} + void Module::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW, bool ShouldPreserveUseListOrder, bool IsForDebug) const { SlotTracker SlotTable(this); Index: llvm/trunk/lib/Transforms/Utils/CMakeLists.txt =================================================================== --- llvm/trunk/lib/Transforms/Utils/CMakeLists.txt +++ llvm/trunk/lib/Transforms/Utils/CMakeLists.txt @@ -26,6 +26,7 @@ LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + MemorySSA.cpp MetaRenamer.cpp ModuleUtils.cpp PromoteMemoryToRegister.cpp Index: llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp +++ llvm/trunk/lib/Transforms/Utils/MemorySSA.cpp @@ -0,0 +1,939 @@ +//===-- MemorySSA.cpp - Memory SSA Builder---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------===// +// +// This file implements the MemorySSA class. +// +//===----------------------------------------------------------------===// +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/MemoryLocation.h" +#include "llvm/Analysis/PHITransAddr.h" +#include "llvm/IR/AssemblyAnnotationWriter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/MemorySSA.h" +#include + +#define DEBUG_TYPE "memoryssa" +using namespace llvm; +STATISTIC(NumClobberCacheLookups, "Number of Memory SSA version cache lookups"); +STATISTIC(NumClobberCacheHits, "Number of Memory SSA version cache hits"); +STATISTIC(NumClobberCacheInserts, "Number of MemorySSA version cache inserts"); +INITIALIZE_PASS_WITH_OPTIONS_BEGIN(MemorySSAPrinterPass, "print-memoryssa", + "Memory SSA", true, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_END(MemorySSAPrinterPass, "print-memoryssa", "Memory SSA", true, + true) +INITIALIZE_PASS(MemorySSALazy, "memoryssalazy", "Memory SSA", true, true) + +namespace llvm { + +/// \brief An assembly annotator class to print Memory SSA information in +/// comments. +class MemorySSAAnnotatedWriter : public AssemblyAnnotationWriter { + friend class MemorySSA; + const MemorySSA *MSSA; + +public: + MemorySSAAnnotatedWriter(const MemorySSA *M) : MSSA(M) {} + + virtual void emitBasicBlockStartAnnot(const BasicBlock *BB, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(BB)) + OS << "; " << *MA << "\n"; + } + + virtual void emitInstructionAnnot(const Instruction *I, + formatted_raw_ostream &OS) { + if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) + OS << "; " << *MA << "\n"; + } +}; +} + +namespace { +struct RenamePassData { + DomTreeNode *DTN; + DomTreeNode::const_iterator ChildIt; + MemoryAccess *IncomingVal; + + RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It, + MemoryAccess *M) + : DTN(D), ChildIt(It), IncomingVal(M) {} + void swap(RenamePassData &RHS) { + std::swap(DTN, RHS.DTN); + std::swap(ChildIt, RHS.ChildIt); + std::swap(IncomingVal, RHS.IncomingVal); + } +}; +} + +namespace llvm { +/// \brief Rename a single basic block into MemorySSA form. +/// Uses the standard SSA renaming algorithm. +/// \returns The new incoming value. +MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB, + MemoryAccess *IncomingVal) { + auto It = PerBlockAccesses.find(BB); + // Skip most processing if the list is empty. + if (It != PerBlockAccesses.end()) { + AccessListType *Accesses = It->second.get(); + for (MemoryAccess &L : *Accesses) { + switch (L.getValueID()) { + case Value::MemoryUseVal: + cast(&L)->setDefiningAccess(IncomingVal); + break; + case Value::MemoryDefVal: + // We can't legally optimize defs, because we only allow single + // memory phis/uses on operations, and if we optimize these, we can + // end up with multiple reaching defs. Uses do not have this + // problem, since they do not produce a value + cast(&L)->setDefiningAccess(IncomingVal); + IncomingVal = &L; + break; + case Value::MemoryPhiVal: + IncomingVal = &L; + break; + } + } + } + + // Pass through values to our successors + for (const BasicBlock *S : successors(BB)) { + auto It = PerBlockAccesses.find(S); + // Rename the phi nodes in our successor block + if (It == PerBlockAccesses.end() || !isa(It->second->front())) + continue; + AccessListType *Accesses = It->second.get(); + auto *Phi = cast(&Accesses->front()); + assert(std::find(succ_begin(BB), succ_end(BB), S) != succ_end(BB) && + "Must be at least one edge from Succ to BB!"); + Phi->addIncoming(IncomingVal, BB); + } + + return IncomingVal; +} + +/// \brief This is the standard SSA renaming algorithm. +/// +/// We walk the dominator tree in preorder, renaming accesses, and then filling +/// in phi nodes in our successors. +void MemorySSA::renamePass(DomTreeNode *Root, MemoryAccess *IncomingVal, + SmallPtrSet &Visited) { + SmallVector WorkStack; + IncomingVal = renameBlock(Root->getBlock(), IncomingVal); + WorkStack.push_back({Root, Root->begin(), IncomingVal}); + Visited.insert(Root->getBlock()); + + while (!WorkStack.empty()) { + DomTreeNode *Node = WorkStack.back().DTN; + DomTreeNode::const_iterator ChildIt = WorkStack.back().ChildIt; + IncomingVal = WorkStack.back().IncomingVal; + + if (ChildIt == Node->end()) { + WorkStack.pop_back(); + } else { + DomTreeNode *Child = *ChildIt; + ++WorkStack.back().ChildIt; + BasicBlock *BB = Child->getBlock(); + Visited.insert(BB); + IncomingVal = renameBlock(BB, IncomingVal); + WorkStack.push_back({Child, Child->begin(), IncomingVal}); + } + } +} + +/// \brief Compute dominator levels, used by the phi insertion algorithm above. +void MemorySSA::computeDomLevels(DenseMap &DomLevels) { + for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode()); + DFI != DFE; ++DFI) + DomLevels[*DFI] = DFI.getPathLength() - 1; +} + +/// \brief This handles unreachable block acccesses by deleting phi nodes in +/// unreachable blocks, and marking all other unreachable MemoryAccess's as +/// being uses of the live on entry definition. +void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) { + assert(!DT->isReachableFromEntry(BB) && + "Reachable block found while handling unreachable blocks"); + + auto It = PerBlockAccesses.find(BB); + if (It == PerBlockAccesses.end()) + return; + + auto &Accesses = It->second; + for (auto AI = Accesses->begin(), AE = Accesses->end(); AI != AE;) { + auto Next = std::next(AI); + // If we have a phi, just remove it. We are going to replace all + // users with live on entry. + if (auto *UseOrDef = dyn_cast(AI)) + UseOrDef->setDefiningAccess(LiveOnEntryDef.get()); + else + Accesses->erase(AI); + AI = Next; + } +} + +MemorySSA::MemorySSA(Function &Func) + : AA(nullptr), DT(nullptr), F(Func), LiveOnEntryDef(nullptr), + Walker(nullptr), NextID(0) {} + +MemorySSA::~MemorySSA() { + // Drop all our references + for (const auto &Pair : PerBlockAccesses) + for (MemoryAccess &MA : *Pair.second) + MA.dropAllReferences(); +} + +MemorySSA::AccessListType *MemorySSA::getOrCreateAccessList(BasicBlock *BB) { + auto Res = PerBlockAccesses.insert(std::make_pair(BB, nullptr)); + + if (Res.second) + Res.first->second = make_unique(); + return Res.first->second.get(); +} + +MemorySSAWalker *MemorySSA::buildMemorySSA(AliasAnalysis *AA, + DominatorTree *DT) { + if (Walker) + return Walker; + + assert(!this->AA && !this->DT && + "MemorySSA without a walker already has AA or DT?"); + + auto *Result = new CachingMemorySSAWalker(this, AA, DT); + this->AA = AA; + this->DT = DT; + + // We create an access to represent "live on entry", for things like + // arguments or users of globals, where the memory they use is defined before + // the beginning of the function. We do not actually insert it into the IR. + // We do not define a live on exit for the immediate uses, and thus our + // semantics do *not* imply that something with no immediate uses can simply + // be removed. + BasicBlock &StartingPoint = F.getEntryBlock(); + LiveOnEntryDef = make_unique(F.getContext(), nullptr, nullptr, + &StartingPoint, NextID++); + + // We maintain lists of memory accesses per-block, trading memory for time. We + // could just look up the memory access for every possible instruction in the + // stream. + SmallPtrSet DefiningBlocks; + + // Go through each block, figure out where defs occur, and chain together all + // the accesses. + for (BasicBlock &B : F) { + AccessListType *Accesses = nullptr; + for (Instruction &I : B) { + MemoryAccess *MA = createNewAccess(&I, true); + if (!MA) + continue; + if (isa(MA)) + DefiningBlocks.insert(&B); + if (!Accesses) + Accesses = getOrCreateAccessList(&B); + Accesses->push_back(MA); + } + } + + // Determine where our MemoryPhi's should go + IDFCalculator IDFs(*DT); + IDFs.setDefiningBlocks(DefiningBlocks); + SmallVector IDFBlocks; + IDFs.calculate(IDFBlocks); + + // Now place MemoryPhi nodes. + for (auto &BB : IDFBlocks) { + // Insert phi node + AccessListType *Accesses = getOrCreateAccessList(BB); + MemoryPhi *Phi = new MemoryPhi(F.getContext(), BB, NextID++); + InstructionToMemoryAccess.insert(std::make_pair(BB, Phi)); + // Phi's always are placed at the front of the block. + Accesses->push_front(Phi); + } + + // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get + // filled in with all blocks. + SmallPtrSet Visited; + renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited); + + // Now optimize the MemoryUse's defining access to point to the nearest + // dominating clobbering def. + // This ensures that MemoryUse's that are killed by the same store are + // immediate users of that store, one of the invariants we guarantee. + for (auto DomNode : depth_first(DT)) { + BasicBlock *BB = DomNode->getBlock(); + auto AI = PerBlockAccesses.find(BB); + if (AI == PerBlockAccesses.end()) + continue; + AccessListType *Accesses = AI->second.get(); + for (auto &MA : *Accesses) { + if (auto *MU = dyn_cast(&MA)) { + Instruction *Inst = MU->getMemoryInst(); + MU->setDefiningAccess(Result->getClobberingMemoryAccess(Inst)); + } + } + } + + // Mark the uses in unreachable blocks as live on entry, so that they go + // somewhere. + for (auto &BB : F) + if (!Visited.count(&BB)) + markUnreachableAsLiveOnEntry(&BB); + + Walker = Result; + return Walker; +} + +/// \brief Helper function to create new memory accesses +MemoryAccess *MemorySSA::createNewAccess(Instruction *I, bool IgnoreNonMemory) { + // Find out what affect this instruction has on memory. + ModRefInfo ModRef = AA->getModRefInfo(I); + bool Def = bool(ModRef & MRI_Mod); + bool Use = bool(ModRef & MRI_Ref); + + // It's possible for an instruction to not modify memory at all. During + // construction, we ignore them. + if (IgnoreNonMemory && !Def && !Use) + return nullptr; + + assert((Def || Use) && + "Trying to create a memory access with a non-memory instruction"); + + MemoryUseOrDef *MA; + if (Def) + MA = new MemoryDef(I->getModule()->getContext(), nullptr, I, I->getParent(), + NextID++); + else + MA = + new MemoryUse(I->getModule()->getContext(), nullptr, I, I->getParent()); + InstructionToMemoryAccess.insert(std::make_pair(I, MA)); + return MA; +} + +MemoryAccess *MemorySSA::findDominatingDef(BasicBlock *UseBlock, + enum InsertionPlace Where) { + // Handle the initial case + if (Where == Beginning) + // The only thing that could define us at the beginning is a phi node + if (MemoryPhi *Phi = getMemoryAccess(UseBlock)) + return Phi; + + DomTreeNode *CurrNode = DT->getNode(UseBlock); + // Need to be defined by our dominator + if (Where == Beginning) + CurrNode = CurrNode->getIDom(); + Where = End; + while (CurrNode) { + auto It = PerBlockAccesses.find(CurrNode->getBlock()); + if (It != PerBlockAccesses.end()) { + auto &Accesses = It->second; + for (auto RAI = Accesses->rbegin(), RAE = Accesses->rend(); RAI != RAE; + ++RAI) { + if (isa(*RAI) || isa(*RAI)) + return &*RAI; + } + } + CurrNode = CurrNode->getIDom(); + } + return LiveOnEntryDef.get(); +} + +/// \brief Returns true if \p Replacer dominates \p Replacee . +bool MemorySSA::dominatesUse(const MemoryAccess *Replacer, + const MemoryAccess *Replacee) const { + if (isa(Replacee)) + return DT->dominates(Replacer->getBlock(), Replacee->getBlock()); + const auto *MP = cast(Replacee); + // For a phi node, the use occurs in the predecessor block of the phi node. + // Since we may occur multiple times in the phi node, we have to check each + // operand to ensure Replacer dominates each operand where Replacee occurs. + for (const Use &Arg : MP->operands()) { + if (Arg != Replacee && + !DT->dominates(Replacer->getBlock(), MP->getIncomingBlock(Arg))) + return false; + } + return true; +} + +void MemorySSA::print(raw_ostream &OS) const { + MemorySSAAnnotatedWriter Writer(this); + F.print(OS, &Writer); +} + +void MemorySSA::dump() const { + MemorySSAAnnotatedWriter Writer(this); + F.print(dbgs(), &Writer); +} + +/// \brief Verify the domination properties of MemorySSA by checking that each +/// definition dominates all of its uses. +void MemorySSA::verifyDomination(Function &F) { + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *MP = getMemoryAccess(&B)) { + for (User *U : MP->users()) { + BasicBlock *UseBlock; + // Phi operands are used on edges, we simulate the right domination by + // acting as if the use occurred at the end of the predecessor block. + if (MemoryPhi *P = dyn_cast(U)) { + for (const auto &Arg : P->operands()) { + if (Arg == MP) { + UseBlock = P->getIncomingBlock(Arg); + break; + } + } + } else { + UseBlock = cast(U)->getBlock(); + } + assert(DT->dominates(MP->getBlock(), UseBlock) && + "Memory PHI does not dominate it's uses"); + } + } + + for (Instruction &I : B) { + MemoryAccess *MD = dyn_cast_or_null(getMemoryAccess(&I)); + if (!MD) + continue; + + for (const auto &U : MD->users()) { + BasicBlock *UseBlock; + // Things are allowed to flow to phi nodes over their predecessor edge. + if (auto *P = dyn_cast(U)) { + for (const auto &Arg : P->operands()) { + if (Arg == MD) { + UseBlock = P->getIncomingBlock(Arg); + break; + } + } + } else { + UseBlock = cast(U)->getBlock(); + } + assert(DT->dominates(MD->getBlock(), UseBlock) && + "Memory Def does not dominate it's uses"); + } + } + } +} + +/// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use +/// appears in the use list of \p Def. +/// +/// llvm_unreachable is used instead of asserts because this may be called in +/// a build without asserts. In that case, we don't want this to turn into a +/// nop. +void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) { + // The live on entry use may cause us to get a NULL def here + if (!Def) { + if (!isLiveOnEntryDef(Use)) + llvm_unreachable("Null def but use not point to live on entry def"); + } else if (std::find(Def->user_begin(), Def->user_end(), Use) == + Def->user_end()) { + llvm_unreachable("Did not find use in def's use list"); + } +} + +/// \brief Verify the immediate use information, by walking all the memory +/// accesses and verifying that, for each use, it appears in the +/// appropriate def's use list +void MemorySSA::verifyDefUses(Function &F) { + for (BasicBlock &B : F) { + // Phi nodes are attached to basic blocks + if (MemoryPhi *Phi = getMemoryAccess(&B)) + for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) + verifyUseInDefs(Phi->getIncomingValue(I), Phi); + + for (Instruction &I : B) { + if (MemoryAccess *MA = getMemoryAccess(&I)) { + assert(isa(MA) && + "Found a phi node not attached to a bb"); + verifyUseInDefs(cast(MA)->getDefiningAccess(), MA); + } + } + } +} + +MemoryAccess *MemorySSA::getMemoryAccess(const Value *I) const { + return InstructionToMemoryAccess.lookup(I); +} + +MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const { + return cast_or_null(getMemoryAccess((const Value *)BB)); +} + +/// \brief Determine, for two memory accesses in the same block, +/// whether \p Dominator dominates \p Dominatee. +/// \returns True if \p Dominator dominates \p Dominatee. +bool MemorySSA::locallyDominates(const MemoryAccess *Dominator, + const MemoryAccess *Dominatee) const { + + assert((Dominator->getBlock() == Dominatee->getBlock()) && + "Asking for local domination when accesses are in different blocks!"); + // Get the access list for the block + const AccessListType *AccessList = getBlockAccesses(Dominator->getBlock()); + AccessListType::const_reverse_iterator It(Dominator->getIterator()); + + // If we hit the beginning of the access list before we hit dominatee, we must + // dominate it + return std::none_of(It, AccessList->rend(), + [&](const MemoryAccess &MA) { return &MA == Dominatee; }); +} + +const static char LiveOnEntryStr[] = "liveOnEntry"; + +void MemoryDef::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + + OS << getID() << " = MemoryDef("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryPhi::print(raw_ostream &OS) const { + bool First = true; + OS << getID() << " = MemoryPhi("; + for (const auto &Op : operands()) { + BasicBlock *BB = getIncomingBlock(Op); + MemoryAccess *MA = cast(Op); + if (!First) + OS << ','; + else + First = false; + + OS << '{'; + if (BB->hasName()) + OS << BB->getName(); + else + BB->printAsOperand(OS, false); + OS << ','; + if (unsigned ID = MA->getID()) + OS << ID; + else + OS << LiveOnEntryStr; + OS << '}'; + } + OS << ')'; +} + +MemoryAccess::~MemoryAccess() {} + +void MemoryUse::print(raw_ostream &OS) const { + MemoryAccess *UO = getDefiningAccess(); + OS << "MemoryUse("; + if (UO && UO->getID()) + OS << UO->getID(); + else + OS << LiveOnEntryStr; + OS << ')'; +} + +void MemoryAccess::dump() const { + print(dbgs()); + dbgs() << "\n"; +} + +char MemorySSAPrinterPass::ID = 0; + +MemorySSAPrinterPass::MemorySSAPrinterPass() : FunctionPass(ID) { + initializeMemorySSAPrinterPassPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSAPrinterPass::releaseMemory() { + // Subtlety: Be sure to delete the walker before MSSA, because the walker's + // dtor may try to access MemorySSA. + Walker.reset(); + MSSA.reset(); +} + +void MemorySSAPrinterPass::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); +} + +bool MemorySSAPrinterPass::doInitialization(Module &M) { + VerifyMemorySSA = + M.getContext() + .template getOption(); + return false; +} + +void MemorySSAPrinterPass::registerOptions() { + OptionRegistry::registerOption( + "verify-memoryssa", "Run the Memory SSA verifier", false); +} + +void MemorySSAPrinterPass::print(raw_ostream &OS, const Module *M) const { + MSSA->print(OS); +} + +bool MemorySSAPrinterPass::runOnFunction(Function &F) { + this->F = &F; + MSSA.reset(new MemorySSA(F)); + AliasAnalysis *AA = &getAnalysis().getAAResults(); + DominatorTree *DT = &getAnalysis().getDomTree(); + Walker.reset(MSSA->buildMemorySSA(AA, DT)); + + if (VerifyMemorySSA) { + MSSA->verifyDefUses(F); + MSSA->verifyDomination(F); + } + + return false; +} + +char MemorySSALazy::ID = 0; + +MemorySSALazy::MemorySSALazy() : FunctionPass(ID) { + initializeMemorySSALazyPass(*PassRegistry::getPassRegistry()); +} + +void MemorySSALazy::releaseMemory() { MSSA.reset(); } + +bool MemorySSALazy::runOnFunction(Function &F) { + MSSA.reset(new MemorySSA(F)); + return false; +} + +MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {} + +CachingMemorySSAWalker::CachingMemorySSAWalker(MemorySSA *M, AliasAnalysis *A, + DominatorTree *D) + : MemorySSAWalker(M), AA(A), DT(D) {} + +CachingMemorySSAWalker::~CachingMemorySSAWalker() {} + +struct CachingMemorySSAWalker::UpwardsMemoryQuery { + // True if we saw a phi whose predecessor was a backedge + bool SawBackedgePhi; + // True if our original query started off as a call + bool IsCall; + // The pointer location we started the query with. This will be empty if + // IsCall is true. + MemoryLocation StartingLoc; + // This is the instruction we were querying about. + const Instruction *Inst; + // Set of visited Instructions for this query. + DenseSet Visited; + // Set of visited call accesses for this query. This is separated out because + // you can always cache and lookup the result of call queries (IE when IsCall + // == true) for every call in the chain. The calls have no AA location + // associated with them with them, and thus, no context dependence. + SmallPtrSet VisitedCalls; + // The MemoryAccess we actually got called with, used to test local domination + const MemoryAccess *OriginalAccess; + // The Datalayout for the module we started in + const DataLayout *DL; + + UpwardsMemoryQuery() + : SawBackedgePhi(false), IsCall(false), Inst(nullptr), + OriginalAccess(nullptr), DL(nullptr) {} +}; + +void CachingMemorySSAWalker::doCacheRemove(const MemoryAccess *M, + const UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) { + if (Q.IsCall) + CachedUpwardsClobberingCall.erase(M); + else + CachedUpwardsClobberingAccess.erase({M, Loc}); +} + +void CachingMemorySSAWalker::doCacheInsert(const MemoryAccess *M, + MemoryAccess *Result, + const UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) { + ++NumClobberCacheInserts; + if (Q.IsCall) + CachedUpwardsClobberingCall[M] = Result; + else + CachedUpwardsClobberingAccess[{M, Loc}] = Result; +} + +MemoryAccess *CachingMemorySSAWalker::doCacheLookup(const MemoryAccess *M, + const UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) { + ++NumClobberCacheLookups; + MemoryAccess *Result = nullptr; + + if (Q.IsCall) + Result = CachedUpwardsClobberingCall.lookup(M); + else + Result = CachedUpwardsClobberingAccess.lookup({M, Loc}); + + if (Result) + ++NumClobberCacheHits; + return Result; +} + +bool CachingMemorySSAWalker::instructionClobbersQuery( + const MemoryDef *MD, UpwardsMemoryQuery &Q, + const MemoryLocation &Loc) const { + Instruction *DefMemoryInst = MD->getMemoryInst(); + assert(DefMemoryInst && "Defining instruction not actually an instruction"); + + if (!Q.IsCall) + return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod; + + // If this is a call, mark it for caching + if (ImmutableCallSite(DefMemoryInst)) + Q.VisitedCalls.insert(MD); + ModRefInfo I = AA->getModRefInfo(DefMemoryInst, ImmutableCallSite(Q.Inst)); + return I != MRI_NoModRef; +} + +MemoryAccessPair CachingMemorySSAWalker::UpwardsDFSWalk( + MemoryAccess *StartingAccess, const MemoryLocation &Loc, + UpwardsMemoryQuery &Q, bool FollowingBackedge) { + MemoryAccess *ModifyingAccess = nullptr; + + auto DFI = df_begin(StartingAccess); + for (auto DFE = df_end(StartingAccess); DFI != DFE;) { + MemoryAccess *CurrAccess = *DFI; + if (MSSA->isLiveOnEntryDef(CurrAccess)) + return {CurrAccess, Loc}; + if (auto CacheResult = doCacheLookup(CurrAccess, Q, Loc)) + return {CacheResult, Loc}; + // If this is a MemoryDef, check whether it clobbers our current query. + if (auto *MD = dyn_cast(CurrAccess)) { + // If we hit the top, stop following this path. + // While we can do lookups, we can't sanely do inserts here unless we were + // to track everything we saw along the way, since we don't know where we + // will stop. + if (instructionClobbersQuery(MD, Q, Loc)) { + ModifyingAccess = CurrAccess; + break; + } + } + + // We need to know whether it is a phi so we can track backedges. + // Otherwise, walk all upward defs. + if (!isa(CurrAccess)) { + ++DFI; + continue; + } + + // Recurse on PHI nodes, since we need to change locations. + // TODO: Allow graphtraits on pairs, which would turn this whole function + // into a normal single depth first walk. + MemoryAccess *FirstDef = nullptr; + DFI = DFI.skipChildren(); + const MemoryAccessPair PHIPair(CurrAccess, Loc); + bool VisitedOnlyOne = true; + for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end(); + MPI != MPE; ++MPI) { + // Don't follow this path again if we've followed it once + if (!Q.Visited.insert(*MPI).second) + continue; + + bool Backedge = + !FollowingBackedge && + DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock()); + + MemoryAccessPair CurrentPair = + UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge); + // All the phi arguments should reach the same point if we can bypass + // this phi. The alternative is that they hit this phi node, which + // means we can skip this argument. + if (FirstDef && CurrentPair.first != PHIPair.first && + CurrentPair.first != FirstDef) { + ModifyingAccess = CurrAccess; + break; + } + + if (!FirstDef) + FirstDef = CurrentPair.first; + else + VisitedOnlyOne = false; + } + + // The above loop determines if all arguments of the phi node reach the + // same place. However we skip arguments that are cyclically dependent + // only on the value of this phi node. This means in some cases, we may + // only visit one argument of the phi node, and the above loop will + // happily say that all the arguments are the same. However, in that case, + // we still can't walk past the phi node, because that argument still + // kills the access unless we hit the top of the function when walking + // that argument. + if (VisitedOnlyOne && FirstDef && !MSSA->isLiveOnEntryDef(FirstDef)) + ModifyingAccess = CurrAccess; + } + + if (!ModifyingAccess) + return {MSSA->getLiveOnEntryDef(), Q.StartingLoc}; + + const BasicBlock *OriginalBlock = Q.OriginalAccess->getBlock(); + unsigned N = DFI.getPathLength(); + MemoryAccess *FinalAccess = ModifyingAccess; + for (; N != 0; --N) { + ModifyingAccess = DFI.getPath(N - 1); + BasicBlock *CurrBlock = ModifyingAccess->getBlock(); + if (!FollowingBackedge) + doCacheInsert(ModifyingAccess, FinalAccess, Q, Loc); + if (DT->dominates(CurrBlock, OriginalBlock) && + (CurrBlock != OriginalBlock || !FollowingBackedge || + MSSA->locallyDominates(ModifyingAccess, Q.OriginalAccess))) + break; + } + + // Cache everything else on the way back. The caller should cache + // Q.OriginalAccess for us. + for (; N != 0; --N) { + MemoryAccess *CacheAccess = DFI.getPath(N - 1); + doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc); + } + assert(Q.Visited.size() < 1000 && "Visited too much"); + + return {ModifyingAccess, Loc}; +} + +/// \brief Walk the use-def chains starting at \p MA and find +/// the MemoryAccess that actually clobbers Loc. +/// +/// \returns our clobbering memory access +MemoryAccess * +CachingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *StartingAccess, + UpwardsMemoryQuery &Q) { + return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false).first; +} + +MemoryAccess * +CachingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *StartingAccess, + MemoryLocation &Loc) { + if (isa(StartingAccess)) + return StartingAccess; + + auto *StartingUseOrDef = cast(StartingAccess); + if (MSSA->isLiveOnEntryDef(StartingUseOrDef)) + return StartingUseOrDef; + + Instruction *I = StartingUseOrDef->getMemoryInst(); + + // Conservatively, fences are always clobbers, so don't perform the walk if we + // hit a fence. + if (isa(I)) + return StartingUseOrDef; + + UpwardsMemoryQuery Q; + Q.OriginalAccess = StartingUseOrDef; + Q.StartingLoc = Loc; + Q.Inst = StartingUseOrDef->getMemoryInst(); + Q.IsCall = false; + Q.DL = &Q.Inst->getModule()->getDataLayout(); + + if (auto CacheResult = doCacheLookup(StartingUseOrDef, Q, Q.StartingLoc)) + return CacheResult; + + // Unlike the other function, do not walk to the def of a def, because we are + // handed something we already believe is the clobbering access. + MemoryAccess *DefiningAccess = isa(StartingUseOrDef) + ? StartingUseOrDef->getDefiningAccess() + : StartingUseOrDef; + + MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q); + doCacheInsert(Q.OriginalAccess, Clobber, Q, Q.StartingLoc); + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *StartingUseOrDef << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Clobber << "\n"); + return Clobber; +} + +MemoryAccess * +CachingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) { + // There should be no way to lookup an instruction and get a phi as the + // access, since we only map BB's to PHI's. So, this must be a use or def. + auto *StartingAccess = cast(MSSA->getMemoryAccess(I)); + + // We can't sanely do anything with a FenceInst, they conservatively + // clobber all memory, and have no locations to get pointers from to + // try to disambiguate + if (isa(I)) + return StartingAccess; + + UpwardsMemoryQuery Q; + Q.OriginalAccess = StartingAccess; + Q.IsCall = bool(ImmutableCallSite(I)); + if (!Q.IsCall) + Q.StartingLoc = MemoryLocation::get(I); + Q.Inst = I; + Q.DL = &Q.Inst->getModule()->getDataLayout(); + if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc)) + return CacheResult; + + // Start with the thing we already think clobbers this location + MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess(); + + // At this point, DefiningAccess may be the live on entry def. + // If it is, we will not get a better result. + if (MSSA->isLiveOnEntryDef(DefiningAccess)) + return DefiningAccess; + + MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q); + doCacheInsert(Q.OriginalAccess, Result, Q, Q.StartingLoc); + // TODO: When this implementation is more mature, we may want to figure out + // what this additional caching buys us. It's most likely A Good Thing. + if (Q.IsCall) + for (const MemoryAccess *MA : Q.VisitedCalls) + doCacheInsert(MA, Result, Q, Q.StartingLoc); + + DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *DefiningAccess << "\n"); + DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is "); + DEBUG(dbgs() << *Result << "\n"); + + return Result; +} + +MemoryAccess * +DoNothingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) { + MemoryAccess *MA = MSSA->getMemoryAccess(I); + if (auto *Use = dyn_cast(MA)) + return Use->getDefiningAccess(); + return MA; +} + +MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess( + MemoryAccess *StartingAccess, MemoryLocation &) { + if (auto *Use = dyn_cast(StartingAccess)) + return Use->getDefiningAccess(); + return StartingAccess; +} +} Index: llvm/trunk/lib/Transforms/Utils/Utils.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/Utils.cpp +++ llvm/trunk/lib/Transforms/Utils/Utils.cpp @@ -32,6 +32,8 @@ initializeUnifyFunctionExitNodesPass(Registry); initializeInstSimplifierPass(Registry); initializeMetaRenamerPass(Registry); + initializeMemorySSALazyPass(Registry); + initializeMemorySSAPrinterPassPass(Registry); } /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses. Index: llvm/trunk/test/Transforms/Util/MemorySSA/atomic-clobber.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/atomic-clobber.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/atomic-clobber.ll @@ -0,0 +1,17 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Ensures that atomic loads count as MemoryDefs + +define i32 @foo(i32* %a, i32* %b) { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 4 + store i32 4, i32* %a, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %1 = load atomic i32 + %1 = load atomic i32, i32* %b acquire, align 4 +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* %a, align 4 + %3 = add i32 %1, %2 + ret i32 %3 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/cyclicphi.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/cyclicphi.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/cyclicphi.ll @@ -0,0 +1,33 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s + +%struct.hoge = type { i32, %struct.widget } +%struct.widget = type { i64 } + +define hidden void @quux(%struct.hoge *%f) align 2 { + %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0 + %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1 + %tmp25 = bitcast %struct.widget* %tmp24 to i64** + br label %bb26 + +bb26: ; preds = %bb77, %0 +; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2}) +; CHECK-NEXT: br i1 undef, label %bb68, label %bb77 + br i1 undef, label %bb68, label %bb77 + +bb68: ; preds = %bb26 +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %tmp69 = load i64, i64* null, align 8 + %tmp69 = load i64, i64* null, align 8 +; CHECK: 1 = MemoryDef(3) +; CHECK-NEXT: store i64 %tmp69, i64* %tmp, align 8 + store i64 %tmp69, i64* %tmp, align 8 + br label %bb77 + +bb77: ; preds = %bb68, %bb26 +; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1}) +; CHECK: MemoryUse(2) +; CHECK-NEXT: %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp78 = load i64*, i64** %tmp25, align 8 + %tmp79 = getelementptr inbounds i64, i64* %tmp78, i64 undef + br label %bb26 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/function-clobber.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/function-clobber.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/function-clobber.ll @@ -0,0 +1,26 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Ensuring that external functions without attributes are MemoryDefs + +@g = external global i32 +declare void @modifyG() + +define i32 @foo() { +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %1 = load i32 + %1 = load i32, i32* @g + +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 4 + store i32 4, i32* @g, align 4 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: call void @modifyG() + call void @modifyG() + +; CHECK: MemoryUse(2) +; CHECK-NEXT: %2 = load i32 + %2 = load i32, i32* @g + %3 = add i32 %2, %1 + ret i32 %3 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/function-mem-attrs.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/function-mem-attrs.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/function-mem-attrs.ll @@ -0,0 +1,58 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Test that various function attributes give us sane results. + +@g = external global i32 + +declare void @readonlyFunction() readonly +declare void @noattrsFunction() + +define void @readonlyAttr() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* @g, align 4 + + %1 = alloca i32, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i32 0 + store i32 0, i32* %1, align 4 + +; CHECK: MemoryUse(1) +; CHECK-NEXT: call void @readonlyFunction() + call void @readonlyFunction() + +; CHECK: MemoryUse(1) +; CHECK-NEXT: call void @noattrsFunction() # +; Assume that #N is readonly + call void @noattrsFunction() readonly + + ; Sanity check that noattrsFunction is otherwise a MemoryDef +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: call void @noattrsFunction() + call void @noattrsFunction() + ret void +} + +declare void @argMemOnly(i32*) argmemonly + +define void @inaccessableOnlyAttr() { + %1 = alloca i32, align 4 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 0 + store i32 0, i32* %1, align 4 + +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i32 0 + store i32 0, i32* @g, align 4 + +; CHECK: MemoryUse(1) +; CHECK-NEXT: call void @argMemOnly(i32* %1) # +; Assume that #N is readonly + call void @argMemOnly(i32* %1) readonly + +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: call void @argMemOnly(i32* %1) + call void @argMemOnly(i32* %1) + + ret void +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/load-invariant.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/load-invariant.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/load-invariant.ll @@ -0,0 +1,24 @@ +; XFAIL: +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Invariant loads should be considered live on entry, because, once the +; location is known to be dereferenceable, the value can never change. +; +; Currently XFAILed because this optimization was held back from the initial +; commit. + +@g = external global i32 + +declare void @clobberAllTheThings() + +define i32 @foo() { +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: call void @clobberAllTheThings() + call void @clobberAllTheThings() +; CHECK: MemoryUse(liveOnEntry) +; CHECK-NEXT: %1 = load i32 + %1 = load i32, i32* @g, align 4, !invariant.load !0 + ret i32 %1 +} + +!0 = !{} Index: llvm/trunk/test/Transforms/Util/MemorySSA/many-dom-backedge.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/many-dom-backedge.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/many-dom-backedge.ll @@ -0,0 +1,76 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; many-dom.ll, with an added back-edge back into the switch. +; Because people love their gotos. + +declare i1 @getBool() readnone + +define i32 @foo(i32* %p) { +entry: + br label %loopbegin + +loopbegin: +; CHECK: 9 = MemoryPhi({entry,liveOnEntry},{sw.epilog,6}) +; CHECK-NEXT: %n = + %n = phi i32 [ 0, %entry ], [ %1, %sw.epilog ] + %m = alloca i32, align 4 + switch i32 %n, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: +; CHECK: 1 = MemoryDef(9) +; CHECK-NEXT: store i32 1 + store i32 1, i32* %m, align 4 + br label %sw.epilog + +sw.bb1: +; CHECK: 2 = MemoryDef(9) +; CHECK-NEXT: store i32 2 + store i32 2, i32* %m, align 4 + br label %sw.epilog + +sw.bb2: +; CHECK: 3 = MemoryDef(9) +; CHECK-NEXT: store i32 3 + store i32 3, i32* %m, align 4 + br label %sw.epilog + +sw.bb3: +; CHECK: 10 = MemoryPhi({loopbegin,9},{sw.almostexit,6}) +; CHECK: 4 = MemoryDef(10) +; CHECK-NEXT: store i32 4 + store i32 4, i32* %m, align 4 + br label %sw.epilog + +sw.default: +; CHECK: 5 = MemoryDef(9) +; CHECK-NEXT: store i32 5 + store i32 5, i32* %m, align 4 + br label %sw.epilog + +sw.epilog: +; CHECK: 8 = MemoryPhi({sw.default,5},{sw.bb3,4},{sw.bb,1},{sw.bb1,2},{sw.bb2,3}) +; CHECK-NEXT: MemoryUse(8) +; CHECK-NEXT: %0 = + %0 = load i32, i32* %m, align 4 +; CHECK: 6 = MemoryDef(8) +; CHECK-NEXT: %1 = + %1 = load volatile i32, i32* %p, align 4 + %2 = icmp eq i32 %0, %1 + br i1 %2, label %sw.almostexit, label %loopbegin + +sw.almostexit: + %3 = icmp eq i32 0, %1 + br i1 %3, label %exit, label %sw.bb3 + +exit: +; CHECK: 7 = MemoryDef(6) +; CHECK-NEXT: %4 = load volatile i32 + %4 = load volatile i32, i32* %p, align 4 + %5 = add i32 %4, %1 + ret i32 %5 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/many-doms.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/many-doms.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/many-doms.ll @@ -0,0 +1,66 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Testing many dominators, specifically from a switch statement in C. + +declare i1 @getBool() readnone + +define i32 @foo(i32* %p) { +entry: + br label %loopbegin + +loopbegin: +; CHECK: 8 = MemoryPhi({entry,liveOnEntry},{sw.epilog,6}) +; CHECK-NEXT: %n = + %n = phi i32 [ 0, %entry ], [ %1, %sw.epilog ] + %m = alloca i32, align 4 + switch i32 %n, label %sw.default [ + i32 0, label %sw.bb + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + ] + +sw.bb: +; CHECK: 1 = MemoryDef(8) +; CHECK-NEXT: store i32 1 + store i32 1, i32* %m, align 4 + br label %sw.epilog + +sw.bb1: +; CHECK: 2 = MemoryDef(8) +; CHECK-NEXT: store i32 2 + store i32 2, i32* %m, align 4 + br label %sw.epilog + +sw.bb2: +; CHECK: 3 = MemoryDef(8) +; CHECK-NEXT: store i32 3 + store i32 3, i32* %m, align 4 + br label %sw.epilog + +sw.bb3: +; CHECK: 4 = MemoryDef(8) +; CHECK-NEXT: store i32 4 + store i32 4, i32* %m, align 4 + br label %sw.epilog + +sw.default: +; CHECK: 5 = MemoryDef(8) +; CHECK-NEXT: store i32 5 + store i32 5, i32* %m, align 4 + br label %sw.epilog + +sw.epilog: +; CHECK: 7 = MemoryPhi({sw.default,5},{sw.bb,1},{sw.bb1,2},{sw.bb2,3},{sw.bb3,4}) +; CHECK-NEXT: MemoryUse(7) +; CHECK-NEXT: %0 = + %0 = load i32, i32* %m, align 4 +; CHECK: 6 = MemoryDef(7) +; CHECK-NEXT: %1 = + %1 = load volatile i32, i32* %p, align 4 + %2 = icmp eq i32 %0, %1 + br i1 %2, label %exit, label %loopbegin + +exit: + ret i32 %1 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/multi-edges.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/multi-edges.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/multi-edges.ll @@ -0,0 +1,31 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Makes sure we have a sane model if both successors of some block is the same +; block. + +define i32 @foo(i1 %a) { +entry: + %0 = alloca i32, align 4 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 4 + store i32 4, i32* %0 + br i1 %a, label %Loop.Body, label %Loop.End + +Loop.Body: +; CHECK: 4 = MemoryPhi({entry,1},{Loop.End,3}) +; CHECK-NEXT: 2 = MemoryDef(4) +; CHECK-NEXT: store i32 5 + store i32 5, i32* %0, align 4 + br i1 %a, label %Loop.End, label %Loop.End ; WhyDoWeEvenHaveThatLever.gif + +Loop.End: +; CHECK: 3 = MemoryPhi({entry,1},{Loop.Body,2},{Loop.Body,2}) +; CHECK-NEXT: MemoryUse(3) +; CHECK-NEXT: %1 = load + %1 = load i32, i32* %0, align 4 + %2 = icmp eq i32 5, %1 + br i1 %2, label %Ret, label %Loop.Body + +Ret: + ret i32 %1 +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll @@ -0,0 +1,72 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s + +; hfinkel's case +; [entry] +; | +; ..... +; (clobbering access - b) +; | +; .... ________________________________ +; \ / | +; (x) | +; ...... | +; | | +; | ______________________ | +; \ / | | +; (starting access) | | +; ... | | +; (clobbering access - a) | | +; ... | | +; | | | | +; | |_______________________| | +; | | +; |_________________________________| +; +; More specifically, one access, with multiple clobbering accesses. One of +; which strictly dominates the access, the other of which has a backedge + +; readnone so we don't have a 1:1 mapping of MemorySSA edges to Instructions. +declare void @doThingWithoutReading() readnone +declare i8 @getValue() readnone +declare i1 @getBool() readnone + +define hidden void @testcase(i8* %Arg) { +Entry: + call void @doThingWithoutReading() + %Val.Entry = call i8 @getValue() +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i8 %Val.Entry + store i8 %Val.Entry, i8* %Arg + call void @doThingWithoutReading() + br label %OuterLoop + +OuterLoop: +; CHECK: 5 = MemoryPhi({Entry,1},{InnerLoop.Tail,3}) +; CHECK-NEXT: %Val.Outer = + %Val.Outer = call i8 @getValue() +; CHECK: 2 = MemoryDef(5) +; CHECK-NEXT: store i8 %Val.Outer + store i8 %Val.Outer, i8* %Arg + call void @doThingWithoutReading() + br label %InnerLoop + +InnerLoop: +; CHECK: 4 = MemoryPhi({OuterLoop,2},{InnerLoop,3}) +; CHECK-NEXT: ; MemoryUse(4) +; CHECK-NEXT: %StartingAccess = load + %StartingAccess = load i8, i8* %Arg, align 4 + %Val.Inner = call i8 @getValue() +; CHECK: 3 = MemoryDef(4) +; CHECK-NEXT: store i8 %Val.Inner + store i8 %Val.Inner, i8* %Arg + call void @doThingWithoutReading() + %KeepGoing = call i1 @getBool() + br i1 %KeepGoing, label %InnerLoop.Tail, label %InnerLoop + +InnerLoop.Tail: + %KeepGoing.Tail = call i1 @getBool() + br i1 %KeepGoing.Tail, label %End, label %OuterLoop + +End: + ret void +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/no-disconnected.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/no-disconnected.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/no-disconnected.ll @@ -0,0 +1,42 @@ +; RUN: opt -basicaa -print-memoryssa -analyze -verify-memoryssa < %s 2>&1 | FileCheck %s +; +; This test ensures we don't end up with multiple reaching defs for a single +; use/phi edge If we were to optimize defs, we would end up with 2= +; MemoryDef(liveOnEntry) and 4 = MemoryDef(liveOnEntry) Both would mean both +; 1,2, and 3,4 would reach the phi node. Because the phi node can only have one +; entry on each edge, it would choose 2, 4 and disconnect 1 and 3 completely +; from the SSA graph, even though they are not dead + +define void @sink_store(i32 %index, i32* %foo, i32* %bar) { +entry: + %cmp = trunc i32 %index to i1 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 %index, i32* %foo, align 4 + store i32 %index, i32* %foo, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store i32 %index, i32* %bar, align 4 + store i32 %index, i32* %bar, align 4 + br label %if.end + +if.else: ; preds = %entry +; CHECK: 3 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store i32 %index, i32* %foo, align 4 + store i32 %index, i32* %foo, align 4 +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: store i32 %index, i32* %bar, align 4 + store i32 %index, i32* %bar, align 4 + br label %if.end + +if.end: ; preds = %if.else, %if.then +; CHECK: 5 = MemoryPhi({if.then,2},{if.else,4}) +; CHECK: MemoryUse(5) +; CHECK-NEXT: %c = load i32, i32* %foo + %c = load i32, i32* %foo +; CHECK: MemoryUse(5) +; CHECK-NEXT: %d = load i32, i32* %bar + %d = load i32, i32* %bar + ret void +} Index: llvm/trunk/test/Transforms/Util/MemorySSA/optimize-use.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/optimize-use.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/optimize-use.ll @@ -0,0 +1,36 @@ +; RUN: opt -basicaa -print-memoryssa -analyze -verify-memoryssa < %s 2>&1 | FileCheck %s + +; Function Attrs: ssp uwtable +define i32 @main() { +entry: +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: %call = call noalias i8* @_Znwm(i64 4) + %call = call noalias i8* @_Znwm(i64 4) + %0 = bitcast i8* %call to i32* +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: %call1 = call noalias i8* @_Znwm(i64 4) + %call1 = call noalias i8* @_Znwm(i64 4) + %1 = bitcast i8* %call1 to i32* +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: store i32 5, i32* %0, align 4 + store i32 5, i32* %0, align 4 +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: store i32 7, i32* %1, align 4 + store i32 7, i32* %1, align 4 +; CHECK: MemoryUse(3) +; CHECK-NEXT: %2 = load i32, i32* %0, align 4 + %2 = load i32, i32* %0, align 4 +; CHECK: MemoryUse(4) +; CHECK-NEXT: %3 = load i32, i32* %1, align 4 + %3 = load i32, i32* %1, align 4 +; CHECK: MemoryUse(3) +; CHECK-NEXT: %4 = load i32, i32* %0, align 4 + %4 = load i32, i32* %0, align 4 +; CHECK: MemoryUse(4) +; CHECK-NEXT: %5 = load i32, i32* %1, align 4 + %5 = load i32, i32* %1, align 4 + %add = add nsw i32 %3, %5 + ret i32 %add +} + +declare noalias i8* @_Znwm(i64) Index: llvm/trunk/test/Transforms/Util/MemorySSA/volatile-clobber.ll =================================================================== --- llvm/trunk/test/Transforms/Util/MemorySSA/volatile-clobber.ll +++ llvm/trunk/test/Transforms/Util/MemorySSA/volatile-clobber.ll @@ -0,0 +1,21 @@ +; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s +; +; Ensures that volatile stores/loads count as MemoryDefs + +define i32 @foo() { + %1 = alloca i32, align 4 +; CHECK: 1 = MemoryDef(liveOnEntry) +; CHECK-NEXT: store volatile i32 4 + store volatile i32 4, i32* %1, align 4 +; CHECK: 2 = MemoryDef(1) +; CHECK-NEXT: store volatile i32 8 + store volatile i32 8, i32* %1, align 4 +; CHECK: 3 = MemoryDef(2) +; CHECK-NEXT: %2 = load volatile i32 + %2 = load volatile i32, i32* %1, align 4 +; CHECK: 4 = MemoryDef(3) +; CHECK-NEXT: %3 = load volatile i32 + %3 = load volatile i32, i32* %1, align 4 + %4 = add i32 %3, %2 + ret i32 %4 +}