Index: llvm/include/llvm/Support/GenericDomTree.h =================================================================== --- llvm/include/llvm/Support/GenericDomTree.h +++ llvm/include/llvm/Support/GenericDomTree.h @@ -721,11 +721,12 @@ llvm::make_unique>(BB, IDomNode))).get(); } - NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); } - void addRoot(NodeT *BB) { this->Roots.push_back(BB); } public: + + NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); } + /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking /// dominator tree in dfs order. void updateDFSNumbers() const { Index: llvm/include/llvm/Support/GenericDomTreeConstruction.h =================================================================== --- llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -269,7 +269,7 @@ } // Free temporary memory used to construct idom's - DT.IDoms.clear(); + //DT.IDoms.clear(); DT.Info.clear(); DT.Vertex.clear(); DT.Vertex.shrink_to_fit(); Index: llvm/include/llvm/Transforms/Utils/MemorySSA.h =================================================================== --- llvm/include/llvm/Transforms/Utils/MemorySSA.h +++ llvm/include/llvm/Transforms/Utils/MemorySSA.h @@ -245,6 +245,7 @@ static inline bool classof(const Value *MA) { return MA->getValueID() == MemoryUseVal || MA->getValueID() == MemoryDefVal; } + void setDefiningAccess(MemoryAccess *DMA) { setOperand(0, DMA); } protected: friend class MemorySSA; @@ -255,7 +256,6 @@ setDefiningAccess(DMA); } - void setDefiningAccess(MemoryAccess *DMA) { setOperand(0, DMA); } private: Instruction *MemoryInst; @@ -612,6 +612,7 @@ /// Used in various insertion functions to specify whether we are talking /// about the beginning or end of a block. enum InsertionPlace { Beginning, End }; + MemoryPhi *createMemoryPhi(BasicBlock *BB); protected: // Used by Memory SSA annotater, dumpers, and wrapper pass @@ -671,7 +672,6 @@ void computeDomLevels(DenseMap &DomLevels); void markUnreachableAsLiveOnEntry(BasicBlock *BB); bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const; - MemoryPhi *createMemoryPhi(BasicBlock *BB); MemoryUseOrDef *createNewAccess(Instruction *); MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace); void placePHINodes(const SmallPtrSetImpl &, Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -137,7 +137,7 @@ "(default = 75)")); static cl::opt EnableGVNHoist( - "enable-gvn-hoist", cl::init(false), cl::Hidden, + "enable-gvn-hoist", cl::init(true), cl::Hidden, cl::desc("Enable the GVN hoisting pass")); static cl::opt @@ -477,6 +477,10 @@ MPM.add(Inliner); Inliner = nullptr; } + // GVNHoist + if(EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (!DisableUnitAtATime) MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -17,14 +17,57 @@ // is disabled in the following cases. // 1. Scalars across calls. // 2. geps when corresponding load/store cannot be hoisted. +// +// TODO: +// For -Oz scalars are always safe to hoist => NO. +// For -O2/-O3 hoist only when the live range improves or remains the same. +// If we haven't computed dominator tree levels, do so now. +// For sink operation anything after hoist barrier is okay to sink +// but nothing before the hoist barrier. See Danny's patch on HoistBarriers. +// +// Find sink opportunities: sink instructions to a common post dominator when +// they compute the same value and hoisting isn't profitable because that will +// increase the liveness. In this case sinking may decrease the liveness when +// the instruction can be delayed. But do not delay so much that latency of the +// operation takes over. Like divide takes 10 cycles maybe, and if the divide is +// sunk into a BB where it is used then the result of divide may take long time +// anyways. So an estimated latency of operation should be computed and the +// instruction should be sunk only till the point when result may be readily +// available for the user of the instruction. +// +// Sinking store may not always be beneficial: sinking frees up a register so +// improves register allocation the stored result may be loaded again, in that +// case it is good to store soon (??) +// +// TODO: Hoist from >2 successors. Currently GVNHoist will not hoist stores +// in this case because it works on two instructions at a time. +// entry: +// switch i32 %c1, label %exit1 [ +// i32 0, label %sw0 +// i32 1, label %sw1 +// ] +// +// sw0: +// store i32 1, i32* @G +// br label %exit +// +// sw1: +// store i32 1, i32* @G +// br label %exit +// +// exit1: +// store i32 1, i32* @G +// ret void +// exit: +// ret void //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/MemorySSA.h" #include "llvm/Transforms/Utils/MemorySSAUpdater.h" @@ -34,6 +77,7 @@ #define DEBUG_TYPE "gvn-hoist" STATISTIC(NumHoisted, "Number of instructions hoisted"); +STATISTIC(NumSunk, "Number of instructions sunk"); STATISTIC(NumRemoved, "Number of instructions removed"); STATISTIC(NumLoadsHoisted, "Number of loads hoisted"); STATISTIC(NumLoadsRemoved, "Number of loads removed"); @@ -41,6 +85,7 @@ STATISTIC(NumStoresRemoved, "Number of stores removed"); STATISTIC(NumCallsHoisted, "Number of calls hoisted"); STATISTIC(NumCallsRemoved, "Number of calls removed"); +//STATISTIC(NumBarriers, "Number of barriers"); static cl::opt MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), @@ -60,7 +105,13 @@ MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10), cl::desc("Maximum length of dependent chains to hoist " "(default = 10, unlimited = -1)")); +static cl::opt + CheckHoistProfitability("gvn-hoist-check-profitability", cl::Hidden, cl::init(true), + cl::desc("Check for proitability (reducing register pressure)")); +static cl::opt + CheckSinkProfitability("gvn-sink-check-profitability", cl::Hidden, cl::init(true), + cl::desc("Check for proitability (reducing register pressure)")); namespace llvm { // Provides a sorting function based on the execution order of two instructions. @@ -73,13 +124,6 @@ // Returns true when A executes before B. bool operator()(const Instruction *A, const Instruction *B) const { - // FIXME: libc++ has a std::sort() algorithm that will call the compare - // function on the same element. Once PR20837 is fixed and some more years - // pass by and all the buildbots have moved to a corrected std::sort(), - // enable the following assert: - // - // assert(A != B); - const BasicBlock *BA = A->getParent(); const BasicBlock *BB = B->getParent(); unsigned ADFS, BDFS; @@ -114,6 +158,8 @@ VNtoScalars[{V, InvalidVN}].push_back(I); } + void clear() { VNtoScalars.clear(); } + const VNtoInsns &getVNTable() const { return VNtoScalars; } }; @@ -130,6 +176,8 @@ } } + void clear() { VNtoLoads.clear(); } + const VNtoInsns &getVNTable() const { return VNtoLoads; } }; @@ -149,6 +197,8 @@ VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store); } + void clear() { VNtoStores.clear(); } + const VNtoInsns &getVNTable() const { return VNtoStores; } }; @@ -175,6 +225,12 @@ VNtoCallsStores[Entry].push_back(Call); } + void clear() { + VNtoCallsScalars.clear(); + VNtoCallsLoads.clear(); + VNtoCallsStores.clear(); + } + const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } @@ -185,6 +241,13 @@ typedef DenseMap BBSideEffectsSet; typedef SmallVector SmallVecInsn; typedef SmallVectorImpl SmallVecImplInsn; +typedef SmallSet SmallSetBB; +typedef DenseMap MergeSetT; +typedef SmallVector SmallVecBB; +typedef SmallVecBB BBLevelKeyT; +typedef std::map BBLevelT; +typedef DenseMap DomLevelsT; +typedef std::pair EdgeT; static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { static const unsigned KnownIDs[] = { @@ -195,55 +258,123 @@ combineMetadata(ReplInst, I, KnownIDs); } +void printBBLevels(const BBLevelT &BBLevels) { + for (const std::pair &P : BBLevels) { + dbgs() << "\nLevel: " << P.first << "\n"; + for (const BasicBlock *BB : P.second) + dbgs() << *BB << "\n"; + } +} + +void printMergeSet(const MergeSetT &M) { + // For printing in a deterministic order. + typedef std::set SetConstBB; + std::map PrintM; + for (const std::pair &P : M) { + for (const BasicBlock *BB : P.second) + PrintM[P.first].insert(BB); + } + for (const std::pair &P : PrintM) { + dbgs() << "\nMergeSet of: " << P.first->getName() << ": "; + for (const BasicBlock *BB : P.second) + dbgs() << BB->getName() << ", "; + } +} + +void printJEdges(const DenseSet &Edges) { + // For printing in a deterministic order. + std::set PrintE(Edges.begin(), Edges.end()); + + for (const EdgeT &E : PrintE) + dbgs() << "\nFound a JEdge: " << E.first->getName() << " -> " + << E.second->getName(); +} + +void printSmallSet(SmallSetBB &S) { + dbgs() << "\nPrinting SmallSet: "; + for (const auto &BB : S) + dbgs() << BB->getName() << ","; +} + +static inline void updateLocalStats(const Instruction* Repl, unsigned &NI, unsigned &NL, unsigned &NS, unsigned &NC) { + if (isa(Repl)) + ++NL; + else if (isa(Repl)) + ++NS; + else if (isa(Repl)) + ++NC; + else // Scalar + ++NI; +} + +static inline void updateGlobalStats(unsigned NI, unsigned NL, unsigned NS, unsigned NC, unsigned NR) { + NumHoisted += NL + NS + NC + NI; + NumRemoved += NR; + NumLoadsHoisted += NL; + NumStoresHoisted += NS; + NumCallsHoisted += NC; +} + // This pass hoists common computations across branches sharing common // dominator. The primary goal is to reduce the code size, and in some // cases reduce critical path (by exposing more ILP). class GVNHoist { public: + // Each element of a hoisting list contains the basic block where to hoist and + // a list of instructions to be hoisted. + typedef std::pair HoistingPointInfo; + typedef SmallVector HoistingPointList; + GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD, MemorySSA *MSSA) : DT(DT), AA(AA), MD(MD), MSSA(MSSA), MSSAUpdater(make_unique(MSSA)), - HoistingGeps(false), - HoistedCtr(0) - { } - - bool run(Function &F) { - VN.setDomTree(DT); - VN.setAliasAnalysis(AA); - VN.setMemDep(MD); - bool Res = false; - // Perform DFS Numbering of instructions. - unsigned BBI = 0; - for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { - DFSNumber[BB] = ++BBI; - unsigned I = 0; - for (auto &Inst : *BB) - DFSNumber[&Inst] = ++I; - } + HoistedCtr(0), HoistingGeps(false) { + clearVNTables(); + } - int ChainLength = 0; + void clearVNTables() { + II.clear(); + LI.clear(); + SI.clear(); + CI.clear(); + } - // FIXME: use lazy evaluation of VN to avoid the fix-point computation. - while (1) { - if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) - return Res; + // DomLevels maps from BB -> its depth from root. + // JEdges only contain the J edges as D edges are available in Dominator Tree. + // BBLevels maps each depth in the CFG to all the Basic Blocks at that level. + // DJ Graph is described in "Sreedhar, Vugranam C. Efficient program analysis + // using DJ graphs. McGill University, 1996". + void constructDJGraph(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels); + + // Return true if S1 is a subset of S2. + bool isSubset(const SmallSetBB &S1, const SmallSetBB &S2) { + if (S1.size() > S2.size()) + return false; + for (BasicBlock *BB : S1) { + if (!S2.count(BB)) + return false; + } + return true; + } - auto HoistStat = hoistExpressions(F); - if (HoistStat.first + HoistStat.second == 0) - return Res; + // DomLevels maps BB to its depth from root. + // JEdges only contain the J-edges as D-edges are available in Dominator Tree. + // BBLevels maps each depth in the CFG to all the BBs at that level. + // BILARDI, G. AND PINGALI, K. 2003. Algorithms for computing the static + // single assignment form. J. ACM 50, 3 (May), 375–425. + bool constructMergeSet(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels); - if (HoistStat.second > 0) - // To address a limitation of the current GVN, we need to rerun the - // hoisting after we hoisted loads or stores in order to be able to - // hoist all scalars dependent on the hoisted ld/st. - VN.clear(); + // Returns true if the \p Op is the last use at I. + // TODO: Find O(1) algorithm for this. + const Instruction *lastUser(const Instruction *I, const Value *Val) const; - Res = true; - } + // Returns true if the \p Op is live-out from \p BB. + bool isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const; - return Res; - } + bool run(Function &F); private: GVN::ValueTable VN; @@ -252,31 +383,28 @@ MemoryDependenceResults *MD; MemorySSA *MSSA; std::unique_ptr MSSAUpdater; - const bool HoistingGeps; DenseMap DFSNumber; BBSideEffectsSet BBSideEffects; + MergeSetT MergeSet; + DenseSet HoistBarrier; + InsnInfo II; + LoadInfo LI; + StoreInfo SI; + CallInfo CI; int HoistedCtr; + const bool HoistingGeps; + //unsigned NumFuncArgs; + //DenseMap Rank; enum InsKind { Unknown, Scalar, Load, Store }; - // Return true when there are exception handling in BB. - bool hasEH(const BasicBlock *BB) { - auto It = BBSideEffects.find(BB); - if (It != BBSideEffects.end()) - return It->second; - - if (BB->isEHPad() || BB->hasAddressTaken()) { - BBSideEffects[BB] = true; - return true; - } - - if (BB->getTerminator()->mayThrow()) { - BBSideEffects[BB] = true; - return true; - } - - BBSideEffects[BB] = false; - return false; + // Return true when I1 appears before I2 in the instructions of BB. + bool firstInBB(const Instruction *I1, const Instruction *I2) const { + assert(I1->getParent() == I2->getParent()); + unsigned I1DFS = DFSNumber.lookup(I1); + unsigned I2DFS = DFSNumber.lookup(I2); + assert(I1DFS && I2DFS); + return I1DFS < I2DFS; } // Return true when a successor of BB dominates A. @@ -288,693 +416,1333 @@ return false; } + bool hoistCandidate(const User *U) const { + if (!VN.exists(const_cast(U))) // Only for scalars + return false; + unsigned V = VN.lookup(const_cast(U)); + + // Multiple scalars with same VN have very high chance of being hoisted. + if (II.getVNTable().count({V, InvalidVN}) > 1) + return true; + + return false; + } + + // Return true when there are exception handling in BB. + bool hasEH(const BasicBlock *BB); + + // Return true when there are exception handling or loads of memory Def + // between Def and NewPt. This function is only called for stores: Def is + // the MemoryDef of the store to be hoisted. + // + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaces 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, + int &NBBsOnAllPaths); + + // Return true when there are exception handling between HoistPt and BB. + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaches 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, + int &NBBsOnAllPaths); + // Return true when all paths from HoistBB to the end of the function pass // through one of the blocks in WL. bool hoistingFromAllPaths(const BasicBlock *HoistBB, - SmallPtrSetImpl &WL) { + SmallPtrSetImpl &WL); - // Copy WL as the loop will remove elements from it. - SmallPtrSet WorkList(WL.begin(), WL.end()); + // Return true when there are memory uses of Def in BB. + bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, + const BasicBlock *BB); - for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) { - // There exists a path from HoistBB to the exit of the function if we are - // still iterating in DF traversal and we removed all instructions from - // the work list. - if (WorkList.empty()) - return false; + // Return true when it is safe to hoist scalar instructions from all blocks in + // WL to HoistBB. TODO: Hoisting scalars past a call only when the + // interference does not increase. + bool safeToHoistScalar(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL, + int &NBBsOnAllPaths); - const BasicBlock *BB = *It; - if (WorkList.erase(BB)) { - // Stop DFS traversal when BB is in the work list. - It.skipChildren(); - continue; - } + // Return true when it is safe to hoist a memory load or store U from OldPt + // to NewPt. + bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, + MemoryUseOrDef *MA, InsKind K, int &NBBsOnAllPaths); - // Check for end of function, calls that do not return, etc. - if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) - return false; + bool safeToHoistLdSt_0(const Instruction *NewHoistPt, + const Instruction *HoistPt, const Instruction *Insn, + MemoryUseOrDef *MA, GVNHoist::InsKind K, + int &NumBBsOnAllPaths, const BasicBlock *HoistPtBB, + const BasicBlock *NewHoistBB, const BasicBlock *InsnBB, + SmallPtrSetImpl &WL); - // When reaching the back-edge of a loop, there may be a path through the - // loop that does not pass through B or C before exiting the loop. - if (successorDominate(BB, HoistBB)) - return false; + // Return true when it is profitable to hoist I1. + bool profitableToHoist(Instruction *I) const; - // Increment DFS traversal when not skipping children. - ++It; - } + // Return true when it is profitable to hoist I1. + bool profitableToSink(Instruction *I) const; + + // Partition InstructionsToHoist into a set of candidates which can share a + // common hoisting point. The partitions are collected in HPL. IsScalar is + // true when the instructions in InstructionsToHoist are scalars. IsLoad is + // true when the InstructionsToHoist are loads, false when they are stores. + void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, + HoistingPointList &HPL, InsKind K); + + bool safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K); + + // Initialize HPL from Map. + void findHoistableInsn(const VNtoInsns &Map, HoistingPointList &HPL, + InsKind K); + + /*void getRanks(Function &F); + + void sortByRank(SmallVectorImpl &V, const VNtoInsns &Map) const;*/ + + // Find sinkable instructions. + void findSinkableInsn(const VNtoInsns &Map, HoistingPointList &HPL, + InsKind K); + + // Return true when all operands of Instr are available at insertion point + // HoistPt. When limiting the number of hoisted expressions, one could hoist + // a load without hoisting its access function. So before hoisting any + // expression, make sure that all its operands are available at insert point. + bool allOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) + return false; return true; } - /* Return true when I1 appears before I2 in the instructions of BB. */ - bool firstInBB(const Instruction *I1, const Instruction *I2) { - assert(I1->getParent() == I2->getParent()); - unsigned I1DFS = DFSNumber.lookup(I1); - unsigned I2DFS = DFSNumber.lookup(I2); - assert(I1DFS && I2DFS); - return I1DFS < I2DFS; + // Same as allOperandsAvailable with recursive check for GEP operands. + bool allGepOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const; + + // Make all operands of the GEP available. + void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist, + Instruction *Gep) const; + + // In the case Repl is a load or a store, we make all their GEPs + // available: GEPs are not hoisted by default to avoid the address + // computations to be hoisted without the associated load or store. + bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const; + + void setAlignment(Instruction *I, Instruction *Repl); + // Remove and rename all instructions other than Repl. + void removeAndReplace(const SmallVecInsn &InstructionsToHoist, + Instruction *Repl); + void removeAndReplace(Instruction *I, Instruction *Repl, MemoryAccess *NewMemAcc); + void removeMPhi(MemoryAccess *NewMemAcc); + std::pair hoist(HoistingPointList &HPL); + std::pair sink(HoistingPointList &HPL); + bool getVN(Instruction *I); + void barriersAndVNs(Function &F, bool Sinking); + // Hoist all expressions. Returns Number of scalars hoisted + // and number of non-scalars hoisted. + std::pair hoistExpressions(Function &F); + std::pair sinkExpressions(Function &F); +}; + +class GVNHoistLegacyPass : public FunctionPass { +public: + static char ID; + + GVNHoistLegacyPass() : FunctionPass(ID) { + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); } - // Return true when there are memory uses of Def in BB. - bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, - const BasicBlock *BB) { - const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); - if (!Acc) + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; + auto &DT = getAnalysis().getDomTree(); + auto &AA = getAnalysis().getAAResults(); + auto &MD = getAnalysis().getMemDep(); + auto &MSSA = getAnalysis().getMSSA(); - Instruction *OldPt = Def->getMemoryInst(); - const BasicBlock *OldBB = OldPt->getParent(); - const BasicBlock *NewBB = NewPt->getParent(); - bool ReachedNewPt = false; + GVNHoist G(&DT, &AA, &MD, &MSSA); + return G.run(F); + } - for (const MemoryAccess &MA : *Acc) - if (const MemoryUse *MU = dyn_cast(&MA)) { - Instruction *Insn = MU->getMemoryInst(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; - // Do not check whether MU aliases Def when MU occurs after OldPt. - if (BB == OldBB && firstInBB(OldPt, Insn)) - break; +bool GVNHoist::hasEH(const BasicBlock *BB) { + auto It = BBSideEffects.find(BB); + if (It != BBSideEffects.end()) + return It->second; - // Do not check whether MU aliases Def when MU occurs before NewPt. - if (BB == NewBB) { - if (!ReachedNewPt) { - if (firstInBB(Insn, NewPt)) - continue; - ReachedNewPt = true; - } - } - if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) - return true; - } + if (BB->isEHPad() || BB->hasAddressTaken()) { + BBSideEffects[BB] = true; + return true; + } - return false; + if (BB->getTerminator()->mayThrow()) { + BBSideEffects[BB] = true; + return true; } - // Return true when there are exception handling or loads of memory Def - // between Def and NewPt. This function is only called for stores: Def is - // the MemoryDef of the store to be hoisted. + BBSideEffects[BB] = false; + return false; +} - // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and - // return true when the counter NBBsOnAllPaths reaces 0, except when it is - // initialized to -1 which is unlimited. - bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, - int &NBBsOnAllPaths) { - const BasicBlock *NewBB = NewPt->getParent(); - const BasicBlock *OldBB = Def->getBlock(); - assert(DT->dominates(NewBB, OldBB) && "invalid path"); - assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && - "def does not dominate new hoisting point"); - - // Walk all basic blocks reachable in depth-first iteration on the inverse - // CFG from OldBB to NewBB. These blocks are all the blocks that may be - // executed between the execution of NewBB and OldBB. Hoisting an expression - // from OldBB into NewBB has to be safe on all execution paths. - for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { - if (*I == NewBB) { - // Stop traversal when reaching HoistPt. - I.skipChildren(); - continue; - } +bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, + int &NBBsOnAllPaths) { + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = Def->getBlock(); + assert(DT->dominates(NewBB, OldBB) && "invalid path"); + assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && + "def does not dominate new hoisting point"); + + // Walk all basic blocks reachable in depth-first iteration on the inverse + // CFG from OldBB to NewBB. These blocks are all the blocks that may be + // executed between the execution of NewBB and OldBB. Hoisting an expression + // from OldBB into NewBB has to be safe on all execution paths. + for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { + const BasicBlock *BB = *I; + if (BB == NewBB) { + // Stop traversal when reaching HoistPt. + I.skipChildren(); + continue; + } - // Stop walk once the limit is reached. - if (NBBsOnAllPaths == 0) - return true; + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; - // Impossible to hoist with exceptions on the path. - if (hasEH(*I)) - return true; + // Impossible to hoist with exceptions on the path. + if (hasEH(BB)) + return true; - // Check that we do not move a store past loads. - if (hasMemoryUse(NewPt, Def, *I)) - return true; + // No such instruction after HoistBarrier in a basic block was + // selected for hoisting so instructions selected within basic block with + // a hoist barrier can be hoisted. + if ((BB != OldBB) && HoistBarrier.count(BB)) + return true; - // -1 is unlimited number of blocks on all paths. - if (NBBsOnAllPaths != -1) - --NBBsOnAllPaths; + // Check that we do not move a store past loads. + if (hasMemoryUse(NewPt, Def, BB)) + return true; - ++I; - } + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; - return false; + ++I; } - // Return true when there are exception handling between HoistPt and BB. - // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and - // return true when the counter NBBsOnAllPaths reaches 0, except when it is - // initialized to -1 which is unlimited. - bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB, - int &NBBsOnAllPaths) { - assert(DT->dominates(HoistPt, BB) && "Invalid path"); - - // Walk all basic blocks reachable in depth-first iteration on - // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the - // blocks that may be executed between the execution of NewHoistPt and - // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe - // on all execution paths. - for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) { - if (*I == HoistPt) { - // Stop traversal when reaching NewHoistPt. - I.skipChildren(); - continue; - } + return false; +} - // Stop walk once the limit is reached. - if (NBBsOnAllPaths == 0) - return true; +bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, + int &NBBsOnAllPaths) { + assert(DT->dominates(HoistPt, SrcBB) && "Invalid path"); + + // Walk all basic blocks reachable in depth-first iteration on + // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the + // blocks that may be executed between the execution of NewHoistPt and + // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe + // on all execution paths. + for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) { + const BasicBlock *BB = *I; + if (BB == HoistPt) { + // Stop traversal when reaching NewHoistPt. + I.skipChildren(); + continue; + } - // Impossible to hoist with exceptions on the path. - if (hasEH(*I)) - return true; + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; - // -1 is unlimited number of blocks on all paths. - if (NBBsOnAllPaths != -1) - --NBBsOnAllPaths; + // Impossible to hoist with exceptions on the path. + if (hasEH(BB)) + return true; - ++I; - } + // No such instruction after HoistBarrier in a basic block was + // selected for hoisting so instructions selected within basic block with + // a hoist barrier can be hoisted. + if ((BB != SrcBB) && HoistBarrier.count(BB)) + return true; - return false; + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; } - // Return true when it is safe to hoist a memory load or store U from OldPt - // to NewPt. - bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, - MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { + return false; +} - // In place hoisting is safe. - if (NewPt == OldPt) - return true; +bool GVNHoist::hoistingFromAllPaths(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL) { - const BasicBlock *NewBB = NewPt->getParent(); - const BasicBlock *OldBB = OldPt->getParent(); - const BasicBlock *UBB = U->getBlock(); + // Copy WL as the loop will remove elements from it. + SmallPtrSet WorkList(WL.begin(), WL.end()); - // Check for dependences on the Memory SSA. - MemoryAccess *D = U->getDefiningAccess(); - BasicBlock *DBB = D->getBlock(); - if (DT->properlyDominates(NewBB, DBB)) - // Cannot move the load or store to NewBB above its definition in DBB. + for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) { + // There exists a path from HoistBB to the exit of the function if we are + // still iterating in DF traversal and we removed all instructions from + // the work list. + if (WorkList.empty()) return false; - if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) - if (auto *UD = dyn_cast(D)) - if (firstInBB(NewPt, UD->getMemoryInst())) - // Cannot move the load or store to NewPt above its definition in D. - return false; + const BasicBlock *BB = *It; + if (WorkList.erase(BB)) { + // Stop DFS traversal when BB is in the work list. + It.skipChildren(); + continue; + } - // Check for unsafe hoistings due to side effects. - if (K == InsKind::Store) { - if (hasEHOrLoadsOnPath(NewPt, dyn_cast(U), NBBsOnAllPaths)) - return false; - } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + // We reached the leaf Basic Block => not all paths have this instruction. + if (!BB->getTerminator()->getNumSuccessors()) return false; - if (UBB == NewBB) { - if (DT->properlyDominates(DBB, NewBB)) + // When reaching the back-edge of a loop, there may be a path through the + // loop that does not pass through B or C before exiting the loop. + if (successorDominate(BB, HoistBB)) + return false; + + // Increment DFS traversal when not skipping children. + ++It; + } + + return true; +} + +bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, + const BasicBlock *BB) { + const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); + if (!Acc) + return false; + + Instruction *OldPt = Def->getMemoryInst(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *NewBB = NewPt->getParent(); + bool ReachedNewPt = false; + + for (const MemoryAccess &MA : *Acc) + if (const MemoryUse *MU = dyn_cast(&MA)) { + Instruction *Insn = MU->getMemoryInst(); + + // Do not check whether MU aliases Def when MU occurs after OldPt. + if (BB == OldBB && firstInBB(OldPt, Insn)) + break; + + // Do not check whether MU aliases Def when MU occurs before NewPt. + if (BB == NewBB) { + if (!ReachedNewPt) { + if (firstInBB(Insn, NewPt)) + continue; + ReachedNewPt = true; + } + } + if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) return true; - assert(UBB == DBB); - assert(MSSA->locallyDominates(D, U)); } - // No side effects: it is safe to hoist. - return true; - } + return false; +} - // Return true when it is safe to hoist scalar instructions from all blocks in - // WL to HoistBB. - bool safeToHoistScalar(const BasicBlock *HoistBB, - SmallPtrSetImpl &WL, - int &NBBsOnAllPaths) { - // Check that the hoisted expression is needed on all paths. - if (!hoistingFromAllPaths(HoistBB, WL)) +bool GVNHoist::safeToHoistScalar(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL, + int &NBBsOnAllPaths) { + // Check that the hoisted expression is needed on all paths. + if (!hoistingFromAllPaths(HoistBB, WL)) + return false; + + for (const BasicBlock *BB : WL) + if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths)) return false; - for (const BasicBlock *BB : WL) - if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths)) - return false; + return true; +} + +bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt, + const Instruction *OldPt, MemoryUseOrDef *MA, + GVNHoist::InsKind K, int &NBBsOnAllPaths) { + // In place hoisting is safe. + if (NewPt == OldPt) return true; - } - // Each element of a hoisting list contains the basic block where to hoist and - // a list of instructions to be hoisted. - typedef std::pair HoistingPointInfo; - typedef SmallVector HoistingPointList; + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *MABB = MA->getBlock(); - // Partition InstructionsToHoist into a set of candidates which can share a - // common hoisting point. The partitions are collected in HPL. IsScalar is - // true when the instructions in InstructionsToHoist are scalars. IsLoad is - // true when the InstructionsToHoist are loads, false when they are stores. - void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, - HoistingPointList &HPL, InsKind K) { - // No need to sort for two instructions. - if (InstructionsToHoist.size() > 2) { - SortByDFSIn Pred(DFSNumber); - std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); - } + // Check for dependences on the Memory SSA. + MemoryAccess *Def = MA->getDefiningAccess(); + BasicBlock *DefBB = Def->getBlock(); + if (DT->properlyDominates(NewBB, DefBB)) + // Cannot move the load or store to NewBB above its definition in DBB. + return false; - int NumBBsOnAllPaths = MaxNumberOfBBSInPath; + if (NewBB == DefBB && !MSSA->isLiveOnEntryDef(Def)) + if (auto *UD = dyn_cast(Def)) + if (firstInBB(NewPt, UD->getMemoryInst())) + // Cannot move the load or store to NewPt above its definition in D. + return false; - SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); - SmallVecImplInsn::iterator Start = II; - Instruction *HoistPt = *II; - BasicBlock *HoistBB = HoistPt->getParent(); - MemoryUseOrDef *UD; - if (K != InsKind::Scalar) - UD = MSSA->getMemoryAccess(HoistPt); - - for (++II; II != InstructionsToHoist.end(); ++II) { - Instruction *Insn = *II; - BasicBlock *BB = Insn->getParent(); - BasicBlock *NewHoistBB; - Instruction *NewHoistPt; - - if (BB == HoistBB) { // Both are in the same Basic Block. - NewHoistBB = HoistBB; - NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt; - } else { - // If the hoisting point contains one of the instructions, - // then hoist there, otherwise hoist before the terminator. - NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB); - if (NewHoistBB == BB) - NewHoistPt = Insn; - else if (NewHoistBB == HoistBB) - NewHoistPt = HoistPt; - else - NewHoistPt = NewHoistBB->getTerminator(); - } + // Check for unsafe hoistings due to side effects. + if (K == InsKind::Store) { + if (hasEHOrLoadsOnPath(NewPt, dyn_cast(MA), NBBsOnAllPaths)) + return false; + } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + return false; - SmallPtrSet WL; - WL.insert(HoistBB); - WL.insert(BB); + if (MABB == NewBB) { + if (DT->properlyDominates(DefBB, NewBB)) + return true; + assert(MABB == DefBB); + assert(MSSA->locallyDominates(Def, MA)); + } - if (K == InsKind::Scalar) { - if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths)) { - // Extend HoistPt to NewHoistPt. - HoistPt = NewHoistPt; - HoistBB = NewHoistBB; - continue; - } - } else { - // When NewBB already contains an instruction to be hoisted, the - // expression is needed on all paths. - // Check that the hoisted expression is needed on all paths: it is - // unsafe to hoist loads to a place where there may be a path not - // loading from the same address: for instance there may be a branch on - // which the address of the load may not be initialized. - if ((HoistBB == NewHoistBB || BB == NewHoistBB || - hoistingFromAllPaths(NewHoistBB, WL)) && - // Also check that it is safe to move the load or store from HoistPt - // to NewHoistPt, and from Insn to NewHoistPt. - safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NumBBsOnAllPaths) && - safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn), - K, NumBBsOnAllPaths)) { - // Extend HoistPt to NewHoistPt. - HoistPt = NewHoistPt; - HoistBB = NewHoistBB; - continue; + // No side effects: it is safe to hoist. + return true; +} + + +bool GVNHoist::safeToHoistLdSt_0(const Instruction *NewHoistPt, + const Instruction *HoistPt, + const Instruction *Insn, MemoryUseOrDef *MA, + GVNHoist::InsKind K, int &NumBBsOnAllPaths, + const BasicBlock *HoistPtBB, + const BasicBlock *NewHoistBB, + const BasicBlock *InsnBB, + SmallPtrSetImpl &WL) { + return (HoistPtBB == NewHoistBB || InsnBB == NewHoistBB || + hoistingFromAllPaths(NewHoistBB, WL)) && + // Also check that it is safe to move the load or store from HoistPt + // to NewHoistPt, and from Insn to NewHoistPt. Note that HoistPt may + // not be the instruction to be hoisted, it is a transient placeholder + // to find the farthest hoisting point when >2 hoistable candidates + // can be hoisted to a common dominator. + safeToHoistLdSt(NewHoistPt, HoistPt, MA, K, NumBBsOnAllPaths) && + safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn), K, + NumBBsOnAllPaths); +} + +bool GVNHoist::profitableToHoist(Instruction *I) const { + if (!CheckHoistProfitability) + return true; + // For -O3/-O2 hoist only when the liveness decreases i.e., no more than + // one operand can be a use without kill. + // Store and Calls do not create a register def. + if (isa(I) || isa(I)) + return true; + + // If Op is a kill then it will not be live-out from its basic block + // but the reverse is not true. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + Value *Op = I->getOperand(op); + //if (isa(Op)) + // continue; + if (isLiveOutUsingMergeSet(I->getParent(), Op)) + return false; + // It is always profitable to hoist when the liveness does not increase, + // a Kill will compensate for the def created by this instruction. + const Instruction *LU = lastUser(I, Op); + if (LU == I) + return true; + else { + // We optimistically assume that if all the users of Op are hoistable + // candidates then it is profitable to hoist. + bool stillProfitable = true; + for (const User *U : Op->users()) { + if (!hoistCandidate(U)) { + stillProfitable = false; + break; } + DEBUG(dbgs() << "\nstill hoistable:" << *U); } + if (stillProfitable) + return true; + } + } + return false; +} + +bool GVNHoist::profitableToSink(Instruction *I) const { + if (!CheckHoistProfitability) + return true; + // For -O3/-O2 sink only when the liveness decreases i.e., no more than + // one operand can be a kill. + // Store and Calls do not create a register def. + if (isa(I) || isa(I)) + return false; + + // If Op is a kill then it will not be live-out from its basic block + // but the reverse is not true. + unsigned NumKills = 0; + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + Value *Op = I->getOperand(op); + // It is always profitable to sink when the liveness does not increase. + if (isLiveOutUsingMergeSet(I->getParent(), Op)) + continue; + // A Kill will increase the liveness during a sink. + const Instruction *LU = lastUser(I, Op); + if (LU == I && ++NumKills > 1) + return false; + } + return true; +} - // At this point it is not safe to extend the current hoisting to - // NewHoistPt: save the hoisting list so far. - if (std::distance(Start, II) > 1) - HPL.push_back({HoistBB, SmallVecInsn(Start, II)}); - - // Start over from BB. - Start = II; - if (K != InsKind::Scalar) - UD = MSSA->getMemoryAccess(*Start); - HoistPt = Insn; - HoistBB = BB; - NumBBsOnAllPaths = MaxNumberOfBBSInPath; +void GVNHoist::partitionCandidates(SmallVecImplInsn &InstructionsToHoist, + GVNHoist::HoistingPointList &HPL, + GVNHoist::InsKind K) { + // No need to sort for two instructions. + if (InstructionsToHoist.size() > 2) { + SortByDFSIn Pred(DFSNumber); + std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); + } + + int NumBBsOnAllPaths = MaxNumberOfBBSInPath; + + SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); + SmallVecImplInsn::iterator Start = II; + Instruction *HoistPt = *II; + BasicBlock *HoistPtBB = HoistPt->getParent(); + MemoryUseOrDef *MA; + if (K != InsKind::Scalar) + MA = MSSA->getMemoryAccess(HoistPt); + + for (++II; II != InstructionsToHoist.end(); ++II) { + Instruction *Insn = *II; + BasicBlock *InsnBB = Insn->getParent(); + BasicBlock *NewHoistBB; + Instruction *NewHoistPt; + + if (InsnBB == HoistPtBB) { // Both are in the same Basic Block. + NewHoistBB = HoistPtBB; + NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt; + } else { + // If the hoisting point contains one of the instructions, + // then hoist there, otherwise hoist before the terminator. + NewHoistBB = DT->findNearestCommonDominator(HoistPtBB, InsnBB); + if (NewHoistBB == InsnBB) + NewHoistPt = Insn; + else if (NewHoistBB == HoistPtBB) + NewHoistPt = HoistPt; + else + NewHoistPt = NewHoistBB->getTerminator(); } - // Save the last partition. + SmallPtrSet WL; + WL.insert(HoistPtBB); + WL.insert(InsnBB); + + // When NewBB already contains an instruction to be hoisted, the + // expression is needed on all paths. + // Check that the hoisted expression is needed on all paths: it is + // unsafe to hoist loads to a place where there may be a path not + // loading from the same address: for instance there may be a branch on + // which the address of the load may not be initialized. + if (K == InsKind::Scalar) { + if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths) && + profitableToHoist(Insn)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistPtBB = NewHoistBB; + continue; + } + } else { + if (safeToHoistLdSt_0(NewHoistPt, HoistPt, Insn, MA, K, NumBBsOnAllPaths, + HoistPtBB, NewHoistBB, InsnBB, WL) && + // Hoist loads when hoiting to pred BB even if liveness increases. + (profitableToHoist(Insn) || InsnBB->getSinglePredecessor() == NewHoistBB)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistPtBB = NewHoistBB; + continue; + } + } + + // At this point it is not safe to extend the current hoisting to + // NewHoistPt: save the hoisting list so far. if (std::distance(Start, II) > 1) - HPL.push_back({HoistBB, SmallVecInsn(Start, II)}); + HPL.push_back({HoistPtBB, SmallVecInsn(Start, II)}); + + // Start over from BB. + Start = II; + if (K != InsKind::Scalar) + MA = MSSA->getMemoryAccess(*Start); + HoistPt = Insn; + HoistPtBB = InsnBB; + NumBBsOnAllPaths = MaxNumberOfBBSInPath; } - // Initialize HPL from Map. - void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, - InsKind K) { - for (const auto &Entry : Map) { - if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) - return; - - const SmallVecInsn &V = Entry.second; - if (V.size() < 2) - continue; + // Save the last partition. + if (std::distance(Start, II) > 1) + HPL.push_back({HoistPtBB, SmallVecInsn(Start, II)}); +} + +// Returns true when the instruction does not result +// in a hazard with any other instruction till the end of BB. +bool GVNHoist::safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K) { + BasicBlock::iterator II(I); + auto IMA = MSSA->getMemoryAccess(I); + assert (IMA); + if (MemoryUseOrDef *I0UD = cast(IMA)) { + // Only sink loads for now. + if (!isa(I0UD)) + return false; + + //MemoryAccess *Def = I0UD->getDefiningAccess(); - // Compute the insertion point and the list of expressions to be hoisted. - SmallVecInsn InstructionsToHoist; - for (auto I : V) - if (!hasEH(I->getParent())) - InstructionsToHoist.push_back(I); + /*/ Updating Memory PHI is tricky, bail out for now. + for (User *U : Def->users()) + if (isa(U)) + return false;*/ + } - if (!InstructionsToHoist.empty()) - partitionCandidates(InstructionsToHoist, HPL, K); + while (++II != BB->end()) { // Skip I + auto IIMA = MSSA->getMemoryAccess(&*II); + // TODO: Bails out on any memory writes for now, improve for non-aliases. + if (IIMA) { + if (!isa(IIMA)) + return false; } } + return true; +} - // Return true when all operands of Instr are available at insertion point - // HoistPt. When limiting the number of hoisted expressions, one could hoist - // a load without hoisting its access function. So before hoisting any - // expression, make sure that all its operands are available at insert point. - bool allOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const { - for (const Use &Op : I->operands()) - if (const auto *Inst = dyn_cast(&Op)) - if (!DT->dominates(Inst->getParent(), HoistPt)) - return false; +void GVNHoist::findHoistableInsn(const VNtoInsns &Map, + GVNHoist::HoistingPointList &HPL, + GVNHoist::InsKind K) { + for (const auto &Entry : Map) { + if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) + return; - return true; + const SmallVecInsn &V = Entry.second; + if (V.size() < 2) + continue; + + // Compute the insertion point and the list of expressions to be hoisted. + SmallVecInsn InstructionsToHoist; + for (auto I : V) { + // We don't need to check for hoist-barriers here because if + // I->getParent() has a barrier then I precedes the barrier. + if (!hasEH(I->getParent())) + InstructionsToHoist.push_back(I); + } + + if (!InstructionsToHoist.empty()) + partitionCandidates(InstructionsToHoist, HPL, K); } +} - // Same as allOperandsAvailable with recursive check for GEP operands. - bool allGepOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const { - for (const Use &Op : I->operands()) - if (const auto *Inst = dyn_cast(&Op)) - if (!DT->dominates(Inst->getParent(), HoistPt)) { - if (const GetElementPtrInst *GepOp = - dyn_cast(Inst)) { - if (!allGepOperandsAvailable(GepOp, HoistPt)) - return false; - // Gep is available if all operands of GepOp are available. - } else { - // Gep is not available if it has operands other than GEPs that are - // defined in blocks not dominating HoistPt. - return false; - } - } - return true; +/*void GVNHoist::getRanks(Function &F) { + // Prefer undef to anything else + if (isa(V)) + return 0; + // Global is also a Constant. + if (isa(V)) + return 1; + else if (auto *A = dyn_cast(V)) + return 2 + A->getArgNo(); + if (Rank.count(V)) + return Rank[V]; + if (Instruction *I = dyn_cast(V)) { + unsigned R = 0; + for (Value *V1 : I->operand_values()) { + unsigned R1 = getRanks(V1); + R = std::max(R, R1); + } + R = 3 + NumFuncArgs + IRank; + Rank[V] = R + 1; + return R; } + // Return a really large number. + return std::numeric_limits::max()/2; +} - // Make all operands of the GEP available. - void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist, - Instruction *Gep) const { - assert(allGepOperandsAvailable(Gep, HoistPt) && - "GEP operands not available"); +void GVNHoist::sortByRank(SmallVectorImpl &V, const VNtoInsns &Map) const { + +}*/ + +void GVNHoist::findSinkableInsn(const VNtoInsns &Map, + GVNHoist::HoistingPointList &HPL, + GVNHoist::InsKind K) { + // Sort the VNs by rank, higher ranked should be sunk first. + //SmallVector SortedRank; + //sortByRank(SortedRank, Map); + for (const auto &Entry : Map) { + const SmallVecInsn &V = Entry.second; + if (V.size() < 2) + continue; + + // Only sink to common post-dom + SmallVecInsn InstructionsToSink; + for (Instruction *I : V) { + auto BB = I->getParent(); + // All the sinkable instructions are collected after any barrier in + // a basic block so no need to check for barrier BBs. + + // The instruction should have only one user i.e., PHI in the Succ + // TODO: Handle store, calls etc. without users + unsigned NumUsers = std::distance(I->user_begin(), I->user_end()); + if (NumUsers != 1) + continue; - Instruction *ClonedGep = Gep->clone(); - for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) - if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { + // The only user should be a PHI. + if(!isa(I->user_back())) + continue; + + // Sink to single successor. + if (auto Succ = BB->getSingleSuccessor()) { + if (I->user_back()->getParent() != Succ) + continue; // PHI should be in the immediate successor. + // The successor should have exactly two predecessors. + unsigned NumPreds = std::distance(pred_begin(Succ), pred_end(Succ)); + if (NumPreds != 2) + continue; - // Check whether the operand is already available. - if (DT->dominates(Op->getParent(), HoistPt)) + // Sinkable instruction found, check for safety and profitability. + // As the instructions are from a basic block without barriers, we can + // sink this instruction as long as there is no hazard. + if (K != InsKind::Scalar && !safeToSinkToEnd(I, BB, K)) + continue; + if (!profitableToSink(I)) continue; - // As a GEP can refer to other GEPs, recursively make all the operands - // of this GEP available at HoistPt. - if (GetElementPtrInst *GepOp = dyn_cast(Op)) - makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); + // We don't need to check for barriers here because the instruction + // will be sunk at the beginning of a basic block i.e., before any + // barrier, and I is after any barriers in I->getParent(). + if (!hasEH(Succ)) + InstructionsToSink.push_back(I); + } + } + + //assert(InstructionsToSink.size() < 3 && "Test case"); + // Sort V such that adjacent Instructions share common PDom. + if (InstructionsToSink.size() != 2) + continue; + //assert(InstructionsToSink.size() != 2 && "Test case"); + // There should be a unique PHI using I0 and I1 to be legally sinkable. + if (InstructionsToSink[0]->user_back() == InstructionsToSink[1]->user_back()) { + auto Succ0 = InstructionsToSink[0]->getParent()->getSingleSuccessor(); + auto Succ1 = InstructionsToSink[1]->getParent()->getSingleSuccessor(); + assert(Succ0 == Succ1); + HPL.push_back({Succ0, InstructionsToSink}); + } + } +} + +bool GVNHoist::allGepOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) { + if (const GetElementPtrInst *GepOp = + dyn_cast(Inst)) { + if (!allGepOperandsAvailable(GepOp, HoistPt)) + return false; + // Gep is available if all operands of GepOp are available. + } else { + // Gep is not available if it has operands other than GEPs that are + // defined in blocks not dominating HoistPt. + return false; + } } + return true; +} - // Copy Gep and replace its uses in Repl with ClonedGep. - ClonedGep->insertBefore(HoistPt->getTerminator()); +void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist, + Instruction *Gep) const { + assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available"); - // Conservatively discard any optimization hints, they may differ on the - // other paths. - ClonedGep->dropUnknownNonDebugMetadata(); + Instruction *ClonedGep = Gep->clone(); + for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { - // If we have optimization hints which agree with each other along different - // paths, preserve them. - for (const Instruction *OtherInst : InstructionsToHoist) { - const GetElementPtrInst *OtherGep; - if (auto *OtherLd = dyn_cast(OtherInst)) - OtherGep = cast(OtherLd->getPointerOperand()); - else - OtherGep = cast( - cast(OtherInst)->getPointerOperand()); - ClonedGep->andIRFlags(OtherGep); + // Check whether the operand is already available. + if (DT->dominates(Op->getParent(), HoistPt)) + continue; + + // As a GEP can refer to other GEPs, recursively make all the operands + // of this GEP available at HoistPt. + if (GetElementPtrInst *GepOp = dyn_cast(Op)) + makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); } - // Replace uses of Gep with ClonedGep in Repl. - Repl->replaceUsesOfWith(Gep, ClonedGep); + // Copy Gep and replace its uses in Repl with ClonedGep. + ClonedGep->insertBefore(HoistPt->getTerminator()); + + // Conservatively discard any optimization hints, they may differ on the + // other paths. + ClonedGep->dropUnknownNonDebugMetadata(); + + // If we have optimization hints which agree with each other along different + // paths, preserve them. + for (const Instruction *OtherInst : InstructionsToHoist) { + const GetElementPtrInst *OtherGep; + if (auto *OtherLd = dyn_cast(OtherInst)) + OtherGep = cast(OtherLd->getPointerOperand()); + else + OtherGep = cast( + cast(OtherInst)->getPointerOperand()); + ClonedGep->andIRFlags(OtherGep); } - // In the case Repl is a load or a store, we make all their GEPs - // available: GEPs are not hoisted by default to avoid the address - // computations to be hoisted without the associated load or store. - bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist) const { - // Check whether the GEP of a ld/st can be synthesized at HoistPt. - GetElementPtrInst *Gep = nullptr; - Instruction *Val = nullptr; - if (auto *Ld = dyn_cast(Repl)) { - Gep = dyn_cast(Ld->getPointerOperand()); - } else if (auto *St = dyn_cast(Repl)) { - Gep = dyn_cast(St->getPointerOperand()); - Val = dyn_cast(St->getValueOperand()); - // Check that the stored value is available. - if (Val) { - if (isa(Val)) { - // Check whether we can compute the GEP at HoistPt. - if (!allGepOperandsAvailable(Val, HoistPt)) - return false; - } else if (!DT->dominates(Val->getParent(), HoistPt)) + // Replace uses of Gep with ClonedGep in Repl. + Repl->replaceUsesOfWith(Gep, ClonedGep); +} + +bool GVNHoist::makeGepOperandsAvailable( + Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const { + // Check whether the GEP of a ld/st can be synthesized at HoistPt. + GetElementPtrInst *Gep = nullptr; + Instruction *Val = nullptr; + if (auto *Ld = dyn_cast(Repl)) { + Gep = dyn_cast(Ld->getPointerOperand()); + } else if (auto *St = dyn_cast(Repl)) { + Gep = dyn_cast(St->getPointerOperand()); + Val = dyn_cast(St->getValueOperand()); + // Check that the stored value is available. + if (Val) { + if (isa(Val)) { + // Check whether we can compute the GEP at HoistPt. + if (!allGepOperandsAvailable(Val, HoistPt)) return false; - } + } else if (!DT->dominates(Val->getParent(), HoistPt)) + return false; } + } - // Check whether we can compute the Gep at HoistPt. - if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) - return false; + // Check whether we can compute the Gep at HoistPt. + if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) + return false; - makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); - if (Val && isa(Val)) - makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); + if (Val && isa(Val)) + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); - return true; + return true; +} + +void GVNHoist::setAlignment(Instruction *I, Instruction *Repl) { + if (auto *ReplacementLoad = dyn_cast(Repl)) { + ReplacementLoad->setAlignment( + std::min(ReplacementLoad->getAlignment(), + cast(I)->getAlignment())); + ++NumLoadsRemoved; + } else if (auto *ReplacementStore = dyn_cast(Repl)) { + ReplacementStore->setAlignment( + std::min(ReplacementStore->getAlignment(), + cast(I)->getAlignment())); + ++NumStoresRemoved; + } else if (auto *ReplacementAlloca = dyn_cast(Repl)) { + ReplacementAlloca->setAlignment( + std::max(ReplacementAlloca->getAlignment(), + cast(I)->getAlignment())); + } else if (isa(Repl)) { + ++NumCallsRemoved; } +} - std::pair hoist(HoistingPointList &HPL) { - unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; - for (const HoistingPointInfo &HP : HPL) { - // Find out whether we already have one of the instructions in HoistPt, - // in which case we do not have to move it. - BasicBlock *HoistPt = HP.first; - const SmallVecInsn &InstructionsToHoist = HP.second; - Instruction *Repl = nullptr; - for (Instruction *I : InstructionsToHoist) - if (I->getParent() == HoistPt) - // If there are two instructions in HoistPt to be hoisted in place: - // update Repl to be the first one, such that we can rename the uses - // of the second based on the first. - if (!Repl || firstInBB(I, Repl)) - Repl = I; - - // Keep track of whether we moved the instruction so we know whether we - // should move the MemoryAccess. - bool MoveAccess = true; - if (Repl) { - // Repl is already in HoistPt: it remains in place. - assert(allOperandsAvailable(Repl, HoistPt) && - "instruction depends on operands that are not available"); - MoveAccess = false; - } else { - // When we do not find Repl in HoistPt, select the first in the list - // and move it to HoistPt. - Repl = InstructionsToHoist.front(); - - // We can move Repl in HoistPt only when all operands are available. - // The order in which hoistings are done may influence the availability - // of operands. - if (!allOperandsAvailable(Repl, HoistPt)) { - - // When HoistingGeps there is nothing more we can do to make the - // operands available: just continue. - if (HoistingGeps) - continue; +void GVNHoist::removeAndReplace(Instruction *I, Instruction *Repl, MemoryAccess *NewMemAcc) { + setAlignment(I, Repl); + if (NewMemAcc) { + // Update the uses of the old MSSA access with NewMemAcc. + MemoryAccess *OldMA = MSSA->getMemoryAccess(I); + OldMA->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(OldMA); + } - // When not HoistingGeps we need to copy the GEPs. - if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) - continue; - } + Repl->andIRFlags(I); + combineKnownMetadata(Repl, I); + I->replaceAllUsesWith(Repl); + // Also invalidate the Alias Analysis cache. + MD->removeInstruction(I); + I->eraseFromParent(); +} - // Move the instruction at the end of HoistPt. - Instruction *Last = HoistPt->getTerminator(); - MD->removeInstruction(Repl); - Repl->moveBefore(Last); +void GVNHoist::removeAndReplace(const SmallVecInsn &InstructionsToHoist, + Instruction *Repl) { + MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl); + // Remove and rename all other instructions. + for (Instruction *I : InstructionsToHoist) + if (I != Repl) { + removeAndReplace(I, Repl, NewMemAcc); + } +} + +void GVNHoist::removeMPhi(MemoryAccess *NewMemAcc) { + // Remove MemorySSA phi nodes with the same arguments. + SmallPtrSet UsePhis; + for (User *U : NewMemAcc->users()) + if (MemoryPhi *Phi = dyn_cast(U)) + UsePhis.insert(Phi); + + for (auto *Phi : UsePhis) { + auto In = Phi->incoming_values(); + if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) { + Phi->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(Phi); + } + } +} - DFSNumber[Repl] = DFSNumber[Last]++; +std::pair +GVNHoist::hoist(GVNHoist::HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; + for (const HoistingPointInfo &HP : HPL) { + // Find out whether we already have one of the instructions in HoistPt, + // in which case we do not have to move it. + BasicBlock *HoistPt = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + Instruction *Repl = nullptr; + for (Instruction *I : InstructionsToHoist) + if (I->getParent() == HoistPt) + // If there are two instructions in HoistPt to be hoisted in place: + // update Repl to be the first one, such that we can rename the uses + // of the second based on the first. + if (!Repl || firstInBB(I, Repl)) + Repl = I; + + // Keep track of whether we moved the instruction so we know whether we + // should move the MemoryAccess. + bool MoveAccess = true; + if (Repl) { + // Repl is already in HoistPt: it remains in place. + assert(allOperandsAvailable(Repl, HoistPt) && + "instruction depends on operands that are not available"); + MoveAccess = false; + } else { + // When we do not find Repl in HoistPt, select the first in the list + // and move it to HoistPt. + Repl = InstructionsToHoist.front(); + + // We can move Repl in HoistPt only when all operands are available. + // The order in which hoistings are done may influence the availability + // of operands. + if (!allOperandsAvailable(Repl, HoistPt)) { + + // When HoistingGeps there is nothing more we can do to make the + // operands available: just continue. + if (HoistingGeps) + continue; + + // When not HoistingGeps we need to copy the GEPs. + if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) + continue; } - MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl); - - if (MoveAccess) { - if (MemoryUseOrDef *OldMemAcc = - dyn_cast_or_null(NewMemAcc)) { - // The definition of this ld/st will not change: ld/st hoisting is - // legal when the ld/st is not moved past its current definition. - MemoryAccess *Def = OldMemAcc->getDefiningAccess(); - NewMemAcc = - MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End); - OldMemAcc->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(OldMemAcc); - } + // Move the instruction at the end of HoistPt. + Instruction *Last = HoistPt->getTerminator(); + MD->removeInstruction(Repl); + Repl->moveBefore(Last); + + DFSNumber[Repl] = DFSNumber[Last]++; + } + + MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl); + + if (MoveAccess) { + if (MemoryUseOrDef *OldMemAcc = + dyn_cast_or_null(NewMemAcc)) { + // The definition of this ld/st will not change: ld/st hoisting is + // legal when the ld/st is not moved past its current definition. + MemoryAccess *Def = OldMemAcc->getDefiningAccess(); + NewMemAcc = MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, + MemorySSA::End); + OldMemAcc->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(OldMemAcc); } + } - if (isa(Repl)) - ++NL; - else if (isa(Repl)) - ++NS; - else if (isa(Repl)) - ++NC; - else // Scalar - ++NI; - - // Remove and rename all other instructions. - for (Instruction *I : InstructionsToHoist) - if (I != Repl) { - ++NR; - if (auto *ReplacementLoad = dyn_cast(Repl)) { - ReplacementLoad->setAlignment( - std::min(ReplacementLoad->getAlignment(), - cast(I)->getAlignment())); - ++NumLoadsRemoved; - } else if (auto *ReplacementStore = dyn_cast(Repl)) { - ReplacementStore->setAlignment( - std::min(ReplacementStore->getAlignment(), - cast(I)->getAlignment())); - ++NumStoresRemoved; - } else if (auto *ReplacementAlloca = dyn_cast(Repl)) { - ReplacementAlloca->setAlignment( - std::max(ReplacementAlloca->getAlignment(), - cast(I)->getAlignment())); - } else if (isa(Repl)) { - ++NumCallsRemoved; - } + updateLocalStats(Repl, NI, NL, NS, NC); + removeAndReplace(InstructionsToHoist, Repl); + // All except one i.e., Repl are removed. + NR += InstructionsToHoist.size() -1; - if (NewMemAcc) { - // Update the uses of the old MSSA access with NewMemAcc. - MemoryAccess *OldMA = MSSA->getMemoryAccess(I); - OldMA->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(OldMA); - } + if (NewMemAcc) + removeMPhi(NewMemAcc); + } - Repl->andIRFlags(I); - combineKnownMetadata(Repl, I); - I->replaceAllUsesWith(Repl); - // Also invalidate the Alias Analysis cache. - MD->removeInstruction(I); - I->eraseFromParent(); - } + updateGlobalStats(NI, NL, NS, NC, NR); + return {NI, NL + NC + NS}; +} - // Remove MemorySSA phi nodes with the same arguments. - if (NewMemAcc) { - SmallPtrSet UsePhis; - for (User *U : NewMemAcc->users()) - if (MemoryPhi *Phi = dyn_cast(U)) - UsePhis.insert(Phi); - - for (auto *Phi : UsePhis) { - auto In = Phi->incoming_values(); - if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) { - Phi->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(Phi); - } - } +// Update MemorySSA if mem-refs are sunk. +std::pair +GVNHoist::sink(GVNHoist::HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0; + for (const HoistingPointInfo &HP : HPL) { + BasicBlock *SinkPt = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + assert(InstructionsToHoist.size() == 2); + Instruction* I0 = InstructionsToHoist[0]; + Instruction* I1 = InstructionsToHoist[1]; + + // We need to check availability for I0 and I1. TODO: Additionally, we can + // still sink if the operand not available is also getting sunk will + // available operands. + if (!allOperandsAvailable(I0, SinkPt) || !allOperandsAvailable(I1, SinkPt)) + continue; + // Keep I0 and remove I1 and PN + PHINode *PN = cast(I0->user_back()); + MemoryAccess *I0MemAccess = MSSA->getMemoryAccess(I0); + DEBUG(dbgs() << "\nSinking:" << *I0 << *I1 << *PN); + + setAlignment(I1, I0); + I0->andIRFlags(I1); + combineKnownMetadata(I0, I1); + // I1->replaceAllUsesWith(I0); Not required because I1 used once in PHI. + // Also invalidate the Alias Analysis cache. + MD->removeInstruction(I1); + MD->removeInstruction(PN); + PN->replaceAllUsesWith(I0); + // FIXME: Update Memory SSA + // Update the uses of the old MSSA access with I0. + // Remove MSSA for I1 + // Remove MSSA for PN, and replace with I0 + // Combine metadata of I0 and I1 + // Memory PHI? + //assert(0); + bool found = false; + auto It = SinkPt->begin(); + while (It != SinkPt->end()) { // Find the last PHI to insert I0 + if (!isa(It)) + break; + if (PN == &*It) + found = true; + ++It; + } + assert(found && "PHI replaced in wrong BB"); + I0->moveBefore(*SinkPt, It); + + if (I0MemAccess) { // I0, I1 are mem-refs + assert(MSSA->getMemoryAccess(I1)); + // This MSSA update is only for mem-refs with one use in a PHI in Succ BB. + MemoryAccess *I1MemAccess = MSSA->getMemoryAccess(I1); + MemoryUseOrDef *I0UD = cast(I0MemAccess); + MemoryUseOrDef *I1UD = cast(I1MemAccess); + + MemoryAccess *I0Def = I0UD->getDefiningAccess(); + MemoryAccess *I1Def = I1UD->getDefiningAccess(); + + MemoryPhi *NewPHI = MSSA->getMemoryAccess(SinkPt); + if (!NewPHI) { + NewPHI = MSSA->createMemoryPhi(SinkPt); + NewPHI->addIncoming(I0Def, I0->getParent()); + NewPHI->addIncoming(I1Def, I1->getParent()); } + + I0UD->setDefiningAccess(NewPHI); + MSSAUpdater->removeMemoryAccess(I0MemAccess); + MSSAUpdater->removeMemoryAccess(I1MemAccess); } - NumHoisted += NL + NS + NC + NI; - NumRemoved += NR; - NumLoadsHoisted += NL; - NumStoresHoisted += NS; - NumCallsHoisted += NC; - return {NI, NL + NC + NS}; + PN->eraseFromParent(); + I1->eraseFromParent(); + + NumSunk++; } + return {NI, NL + NC + NS}; +} - // Hoist all expressions. Returns Number of scalars hoisted - // and number of non-scalars hoisted. - std::pair hoistExpressions(Function &F) { - InsnInfo II; - LoadInfo LI; - StoreInfo SI; - CallInfo CI; - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { - int InstructionNb = 0; - for (Instruction &I1 : *BB) { - // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting - // deeper may increase the register pressure and compilation time. - if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) - break; +bool GVNHoist::getVN(Instruction *I) { + // Do not value number terminator instructions. + if (isa(I)) + return false; - // Do not value number terminator instructions. - if (isa(&I1)) - break; + if (auto *Load = dyn_cast(I)) + LI.insert(Load, VN); + else if (auto *Store = dyn_cast(I)) + SI.insert(Store, VN); + else if (auto *Call = dyn_cast(I)) { + if (auto *Intr = dyn_cast(Call)) { + if (isa(Intr) || + Intr->getIntrinsicID() == Intrinsic::assume) + return true; + } + if (Call->mayHaveSideEffects()) + return false; - if (auto *Load = dyn_cast(&I1)) - LI.insert(Load, VN); - else if (auto *Store = dyn_cast(&I1)) - SI.insert(Store, VN); - else if (auto *Call = dyn_cast(&I1)) { - if (auto *Intr = dyn_cast(Call)) { - if (isa(Intr) || - Intr->getIntrinsicID() == Intrinsic::assume) - continue; - } - if (Call->mayHaveSideEffects()) - break; + if (Call->isConvergent()) + return false; + + CI.insert(Call, VN); + } else if (HoistingGeps || !isa(I)) + // Do not hoist scalars past calls that may write to memory because + // that could result in spills later. geps are handled separately. + // TODO: We can relax this for targets like AArch64 as they have more + // registers than X86. + II.insert(I, VN); + + return true; +} + +void GVNHoist::barriersAndVNs(Function &F, bool Sinking) { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + int InstructionNb = 0; + bool BarrierFound = false; + SmallVector, 4> CurrentBBVNs; + for (Instruction &I1 : *BB) { + // If I1 cannot guarantee progress, subsequent instructions + // in BB cannot be hoisted anyways. + // TODO: For sink operation anything after hoist barrier is okay to sink + // but nothing before the hoist barrier. + if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) { + //NumBarriers++; + HoistBarrier.insert(BB); + BarrierFound = true; + break; + } + + // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting + // deeper may increase the register pressure and compilation time. + if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) + break; - if (Call->isConvergent()) + if (!getVN(&I1)) + break; + } + if (Sinking && BarrierFound) { + // TODO: clear VNs for this basicblock. + // Instructions after the last barriers can be sunk. + for (BasicBlock::reverse_iterator It = BB->rbegin(), Ie = BB->rend(); + It != Ie; ++It) { + if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) + break; + if (!getVN(&*It)) break; + //assert(0); + } + } + } +} + +std::pair GVNHoist::hoistExpressions(Function &F) { + barriersAndVNs(F, false); + HoistingPointList HPL; + findHoistableInsn(II.getVNTable(), HPL, InsKind::Scalar); + findHoistableInsn(LI.getVNTable(), HPL, InsKind::Load); + findHoistableInsn(SI.getVNTable(), HPL, InsKind::Store); + findHoistableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar); + findHoistableInsn(CI.getLoadVNTable(), HPL, InsKind::Load); + findHoistableInsn(CI.getStoreVNTable(), HPL, InsKind::Store); + return hoist(HPL); +} - CI.insert(Call, VN); - } else if (HoistingGeps || !isa(&I1)) - // Do not hoist scalars past calls that may write to memory because - // that could result in spills later. geps are handled separately. - // TODO: We can relax this for targets like AArch64 as they have more - // registers than X86. - II.insert(&I1, VN); +std::pair GVNHoist::sinkExpressions(Function &F) { + barriersAndVNs(F, false); // First clear VNs before barrier in that BB. see TODO then set it to true. + HoistingPointList HPL; + findSinkableInsn(II.getVNTable(), HPL, InsKind::Scalar); + findSinkableInsn(LI.getVNTable(), HPL, InsKind::Load); + findSinkableInsn(SI.getVNTable(), HPL, InsKind::Store); + findSinkableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar); + findSinkableInsn(CI.getLoadVNTable(), HPL, InsKind::Load); + findSinkableInsn(CI.getStoreVNTable(), HPL, InsKind::Store); + return sink(HPL); +} + +void GVNHoist::constructDJGraph(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels) { + for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode()); + DFI != DFE; ++DFI) { + // Since getPathLength is inclusive of both the terminal nodes + // i.e., Entry and *DFI so decrease by 1. + unsigned Depth = DFI.getPathLength() - 1; + BasicBlock *BB = (*DFI)->getBlock(); + DomLevels[BB] = Depth; + BBLevels[Depth].push_back(BB); + for (BasicBlock *Succ : BB->getTerminator()->successors()) + if (!DT->properlyDominates(BB, Succ)) { + JEdges.insert(std::make_pair(BB, Succ)); + } + } +} + +bool GVNHoist::constructMergeSet(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels) { + bool Repeat = false; + DenseSet VisJEdges; // Visited J Edges. + unsigned PrevLev = 0; + for (std::pair &P : BBLevels) { + assert(PrevLev <= P.first); + PrevLev = P.first; + for (BasicBlock *CurrBB : P.second) { + for (auto PB = pred_begin(CurrBB), PE = pred_end(CurrBB); PB != PE; + ++PB) { + EdgeT Edge(*PB, CurrBB); // For all incoming edges to CurrBB. + if (JEdges.count(Edge) && !VisJEdges.count(Edge)) { + VisJEdges.insert(Edge); // Visit + BasicBlock *Src = Edge.first; + BasicBlock *Dst = Edge.second; + BasicBlock *INode = nullptr; + MergeSet[Dst].insert(Dst); // The target of JEdge. + while (DomLevels[Src] >= DomLevels[Dst]) { // A backedge. + DEBUG(dbgs() << "\nVisiting: " << Src->getName() << " -> " + << Dst->getName()); + // Merge (tmp) = Merge (tmp) U Merge (tnode) U { tnode } + // MergeSet(tnode) contains tnode. + MergeSet[Src].insert(MergeSet[Dst].begin(), MergeSet[Dst].end()); + INode = Src; + DEBUG(dbgs() << "IDom of " << Src->getName() << " is "); + Src = DT->getIDom(Src); + DEBUG(dbgs() << Src->getName()); + } + for (auto PINode = pred_begin(INode), PENode = pred_end(INode); + PINode != PENode; ++PINode) { // INode is an ancestor of SNode. + EdgeT Edge(*PINode, INode); + if (VisJEdges.count(Edge)) { + assert(JEdges.count(Edge)); + BasicBlock *SNode = *PINode; + // Check inconsistency, MergeSet[Dest] subset of MergeSet[Src] + if (!isSubset(MergeSet[INode], MergeSet[SNode])) + Repeat = true; + } + } + } } } + } + return Repeat; +} - HoistingPointList HPL; - computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); - computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); - computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); - computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); - computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); - computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); - return hoist(HPL); +const Instruction *GVNHoist::lastUser(const Instruction *I, + const Value *Val) const { + // TODO: Make isLiveOutUsingMergeSet take const parameters. + assert(!isLiveOutUsingMergeSet(const_cast(I->getParent()), + const_cast(Val))); + const BasicBlock *BB = I->getParent(); + BasicBlock::const_iterator BI(I), BE = BB->end(); + unsigned ICount = std::distance(BI, BE); + if (Val->getNumUses() <= ICount) { // Iterate on uses + for (const User *U : Val->users()) { + const Instruction *UserI = cast(U); + if (UserI != I && UserI->getParent() == BB) { + if (firstInBB(I, UserI)) // I precedes another Use => not a kill. + return UserI; + } + } + return I; } -}; + // else Iterate on Instructions + for (++BI; BI != BE; ++BI) { + for (unsigned i = 0; i < BI->getNumOperands(); ++i) + if (BI->getOperandUse(i).get() == Val) + return &*BI; + } + return I; +} -class GVNHoistLegacyPass : public FunctionPass { -public: - static char ID; +bool GVNHoist::isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const { + assert(BB); + const BasicBlock *ValDefBB = nullptr; // BasicBlock defining Val + if (Instruction *I = dyn_cast(Val)) + ValDefBB = I->getParent(); + // FIXME! + // We are assuming when DefBB is not defined then the value is a parameter. + + // Case when Val is defined in BB, if any of the use is outside BB (DefBB) + // then it must be live-out. + if (ValDefBB == BB) + for (User *U : Val->users()) { + if (cast(U)->getParent() != BB) + return true; + } - GVNHoistLegacyPass() : FunctionPass(ID) { - initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + // Mr(n) = M(n) U {n}; Create a new set from the merge set + // Ms(n) = Ms(n) U Mr(w); For each successor w of BB + SmallSetBB Ms; // Ms = null-set + for (BasicBlock *Succ : BB->getTerminator()->successors()) { + Ms.insert(Succ); // Mr(Succ) = Succ U M(Succ) + for (BasicBlock *BB : MergeSet.lookup(Succ)) + Ms.insert(BB); // M(Succ) } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - auto &DT = getAnalysis().getDomTree(); - auto &AA = getAnalysis().getAAResults(); - auto &MD = getAnalysis().getMemDep(); - auto &MSSA = getAnalysis().getMSSA(); + // Iterate over all the uses of Val, if any of its users is in the mergeset + // of \p BB then Val is LiveOut from BB. + for (User *U : Val->users()) { + BasicBlock *UserDefBB = nullptr; + if (Instruction *I = dyn_cast(U)) + UserDefBB = I->getParent(); + else // Assuming live-out conservatively, the user can be a global + // TODO: maybe return false is okay?? + return true; //llvm_unreachable("User is not an instruction."); + while (UserDefBB && (UserDefBB != ValDefBB)) { + if (Ms.count(UserDefBB)) // if t /\ Ms(n) then return true; + return true; + UserDefBB = DT->getIDom(UserDefBB); + } + } + return false; +} - GVNHoist G(&DT, &AA, &MD, &MSSA); - return G.run(F); +bool GVNHoist::run(Function &F) { + if (F.optForMinSize()) { + CheckHoistProfitability = false; + CheckSinkProfitability = false; } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); + //NumFuncArgs = F.arg_size(); + DT->updateDFSNumbers(); + DomLevelsT DomLevels; + DenseSet JEdges; + BBLevelT BBLevels; + constructDJGraph(DomLevels, JEdges, BBLevels); + // printBBLevels(BBLevels); + DEBUG(printJEdges(JEdges)); + while (constructMergeSet(DomLevels, JEdges, BBLevels)) + ; + DEBUG(printMergeSet(MergeSet)); + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + bool Res = false; + // Perform DFS Numbering of instructions. + unsigned BBI = 0; + for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { + DFSNumber[BB] = ++BBI; + unsigned I = 0; + for (auto &Inst : *BB) + DFSNumber[&Inst] = ++I; } -}; -} // namespace + + int ChainLength = 0; + + // FIXME: use lazy evaluation of VN to avoid the fix-point computation. + while (1) { + if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) + return Res; + clearVNTables(); + auto HoistStat = hoistExpressions(F); + if (HoistStat.first + HoistStat.second == 0) + break; + + if (HoistStat.second > 0) + // To address a limitation of the current GVN, we need to rerun the + // hoisting after we hoisted loads or stores in order to be able to + // hoist all scalars dependent on the hoisted ld/st. + VN.clear(); + + Res = true; + } + + clearVNTables(); + sinkExpressions(F); + return Res; +} + +} // namespace llvm PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); Index: llvm/test/Transforms/GVNHoist/broken-mod-sink.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/broken-mod-sink.ll @@ -0,0 +1,94 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; Check that all and, inttoptr, and bitcast is sunk. +; CHECK: and +; CHECK-NOT: and +; CHECK: inttoptr +; CHECK-NOT: inttoptr +; CHECK: bitcast +; CHECK-NOT: bitcast + + +; ModuleID = 'broken-mod.ll' + +%struct.pluto = type { i32 (...)**, i8, i8, i16, i32 } + +define void @baz0() { +bb: + br i1 undef, label %bb1, label %bb4 + +bb1: ; preds = %bb + %tmp = and i64 undef, -8 + %tmp2 = inttoptr i64 %tmp to i8* + %tmp3 = bitcast i8* %tmp2 to %struct.pluto* + br label %bb9 + +bb4: ; preds = %bb + br i1 undef, label %bb5, label %bb11 + +bb5: ; preds = %bb4 + %tmp6 = and i64 undef, -8 + %tmp7 = inttoptr i64 %tmp6 to i8* + %tmp8 = bitcast i8* %tmp7 to %struct.pluto* + br label %bb9 + +bb9: ; preds = %bb5, %bb1 + %tmp10 = phi %struct.pluto* [ %tmp8, %bb5 ], [ %tmp3, %bb1 ] + unreachable + +bb11: ; preds = %bb4 + ret void +} + +; Function Attrs: nounwind uwtable +define void @baz() { +bb: + %tmp = and i64 undef, -8 + %tmp2 = inttoptr i64 %tmp to i8* + br i1 undef, label %bb1, label %bb4 + +bb1: ; preds = %bb + %tmp3 = bitcast i8* %tmp2 to %struct.pluto* + br label %bb9 + +bb4: ; preds = %bb + br i1 undef, label %bb5, label %bb11 + +bb5: ; preds = %bb4 + %tmp8 = bitcast i8* %tmp2 to %struct.pluto* + br label %bb9 + +bb9: ; preds = %bb5, %bb1 + %tmp10 = phi %struct.pluto* [ %tmp8, %bb5 ], [ %tmp3, %bb1 ] + unreachable + +bb11: ; preds = %bb4 + ret void +} + +define void @baz1() { +bb: + %tmp = and i64 undef, -8 + br i1 undef, label %bb1, label %bb4 + +bb1: ; preds = %bb + %tmp2 = inttoptr i64 %tmp to i8* + %tmp10 = bitcast i8* %tmp2 to %struct.pluto* + br label %bb9 + +bb4: ; preds = %bb + br i1 undef, label %bb5, label %bb11 + +bb5: ; preds = %bb4 + %tmp3 = inttoptr i64 %tmp to i8* + %tmp8 = bitcast i8* %tmp3 to %struct.pluto* + br label %bb9 + +bb9: ; preds = %bb5, %bb1 + %tmp11 = phi %struct.pluto* [ %tmp8, %bb5 ], [ %tmp10, %bb1 ] + unreachable + +bb11: ; preds = %bb4 + ret void +} + Index: llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll @@ -0,0 +1,62 @@ +; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; Testcase taken from: +; Das, Dibyendu, B. Dupont De Dinechin, and Ramakrishna Upadrasta. "Efficient +; liveness computation using merge sets and dj-graphs." ACM Transactions on +; Architecture and Code Optimization (TACO) 8.4 (2012): 27. + +; check the J edges +; CHECK: Found a JEdge: bb4 -> bb5 +; CHECK: Found a JEdge: bb5 -> bb6 +; CHECK: Found a JEdge: bb6 -> bb5 +; CHECK: Found a JEdge: bb7 -> bb2 +; CHECK: Found a JEdge: bb9 -> bb6 +; CHECK: Found a JEdge: bb10 -> bb8 + +; check the mergesets +; CHECK: MergeSet of: bb2: bb2, +; CHECK: MergeSet of: bb3: bb2, +; CHECK: MergeSet of: bb4: bb2, bb5, bb6, +; CHECK: MergeSet of: bb8: bb2, bb8, bb5, bb6, +; CHECK: MergeSet of: bb5: bb2, bb5, bb6, +; CHECK: MergeSet of: bb6: bb2, bb5, bb6, +; CHECK: MergeSet of: bb7: bb2, +; CHECK: MergeSet of: bb9: bb2, bb8, bb5, bb6, +; CHECK: MergeSet of: bb10: bb2, bb8, bb5, bb6, + + +define void @irreducible_loop() { +bb1: +br label %bb2 + +bb2: + br i1 undef, label %bb3, label %bb11 + +bb3: + br i1 undef, label %bb4, label %bb8 + +bb4: + br label %bb5 + +bb5: + br label %bb6 + +bb6: + br i1 undef, label %bb7, label %bb5 + +bb7: + br label %bb2 + +bb8: + br label %bb9 + +bb9: + br i1 undef, label %bb6, label %bb10 + +bb10: + br label %bb8 + +bb11: + ret void +} \ No newline at end of file Index: llvm/test/Transforms/GVNHoist/dj-edge-detect.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/dj-edge-detect.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; check the J edges +; CHECK: Found a JEdge: bb2 -> bb3 +; CHECK: Found a JEdge: bb18 -> bb6 +; CHECK: Found a JEdge: bb15 -> bb18 +; CHECK: Found a JEdge: bb8 -> bb18 +; CHECK: Found a JEdge: bb10 -> bb18 +; CHECK: Found a JEdge: bb13 -> bb18 + + +; check the mergesets +; CHECK: MergeSet of: bb3: bb3, +; CHECK: MergeSet of: bb2: bb3, +; CHECK: MergeSet of: bb6: bb3, bb6, +; CHECK: MergeSet of: bb18: bb3, bb6, bb18, +; CHECK: MergeSet of: bb15: bb3, bb6, bb18, +; CHECK: MergeSet of: bb8: bb3, bb6, bb18, +; CHECK: MergeSet of: bb10: bb3, bb6, bb18, +; CHECK: MergeSet of: bb13: bb3, bb6, bb18, + +%0 = type { %1*, %8* } +%1 = type { %2*, %3* } +%2 = type { i32, i32, i32, i32, i32*, i8*, i32 } +%3 = type { %4, i32, i32, %7*, i8, i8, i8, i8, i8 } +%4 = type { i8, i32, i8*, %5 } +%5 = type { %6 } +%6 = type { i16 } +%7 = type { i8*, i8*, i8*, i8*, i8* } +%8 = type <{ i8*, i8*, i32, %9, %17*, %19* }> +%9 = type { %10*, i8*, i8*, i8* (i8*)*, i32* (i8*)*, i8*, %15, i32, i32, i8*, %16*, i32, i8, i8*, i8, i8* } +%10 = type { i8*, %11*, i8*, %12, %13*, i8*, i32 (i8*)*, i32, i8, %14 } +%11 = type { i8*, i32 (i8*, %12*, i8*)*, i32 (i8*)*, i32 (i8*, i32)*, %16* (i8*)*, i32 (i8*)*, i32 (i8*)*, i32 (...)*, i32 (...)*, i32 (...)*, void (i8*, i32)*, void (i8*, i32, i8*, i32)*, i32 (i8*, i32, i8*)* } +%12 = type { i8*, i32 (i8*, i32, i8*, i32, i8)*, i32 (i8*, i32, i8*)*, void (i8*, i8*, i32)*, i32 (i8*)*, i32 (i8*, i32, i32)* } +%13 = type { i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)* } +%14 = type { i32, i32, i8, i8, i8 } +%15 = type { i32, i32, i32 } +%16 = type { i16, i16, i32, i32 } +%17 = type { i8*, i8*, i16 (i8*, i8*, i16)*, i32 (i8*, i8, i32)*, i32 (%16*, i32)*, %18, i32, i32 } +%18 = type { i8*, i8* (i8*, i8*, i32)* } +%19 = type { %20, i32, i32 } +%20 = type { i32, i32, [8 x i8], i8, i8, i16 } + +@global = external hidden unnamed_addr constant [10 x i8], align 1 + +define void @ham(i8* nocapture readnone %arg) local_unnamed_addr { +bb: + %tmp = alloca %0*, align 8 + %tmp1 = bitcast %0** %tmp to i8* + br label %bb3 + +bb2: ; preds = %bb6 + br label %bb3 + +bb3: ; preds = %bb2, %bb + %tmp4 = load %0*, %0** %tmp, align 8, !tbaa !0 + %tmp5 = call i32 @quux(i8* %tmp1) + br label %bb6 + +bb6: ; preds = %bb18, %bb3 + %tmp7 = phi i32 [ %tmp5, %bb3 ], [ %tmp19, %bb18 ] + switch i32 %tmp7, label %bb15 [ + i32 0, label %bb2 + i32 35, label %bb8 + i32 11, label %bb10 + i32 12, label %bb13 + ] + +bb8: ; preds = %bb6 + %tmp9 = load %0*, %0** %tmp, align 8, !tbaa !0 + br label %bb18 + +bb10: ; preds = %bb6 + %tmp11 = load %0*, %0** %tmp, align 8, !tbaa !0 + %tmp12 = call i32 @quux(i8* %tmp1) + br label %bb18 + +bb13: ; preds = %bb6 + %tmp14 = call i32 @quux(i8* %tmp1) + br label %bb18 + +bb15: ; preds = %bb6 + %tmp16 = load %0*, %0** %tmp, align 8, !tbaa !0 + %tmp17 = call i32 @quux(i8* null) + br label %bb18 + +bb18: ; preds = %bb15, %bb13, %bb10, %bb8 + %tmp19 = phi i32 [ %tmp17, %bb15 ], [ %tmp14, %bb13 ], [ %tmp12, %bb10 ], [ 0, %bb8 ] + br label %bb6 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +declare i32 @zot(%0**) + +declare i32 @quux(i8*) + +declare i32 @wombat(i8* nocapture readonly, ...) + +attributes #0 = { argmemonly nounwind } + +!0 = !{!1, !1, i64 0} +!1 = !{!"any pointer", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} Index: llvm/test/Transforms/GVNHoist/gvn-sink-test-1.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/gvn-sink-test-1.ll @@ -0,0 +1,49 @@ +; RUN: opt -S -gvn-hoist -debug < %s +; ModuleID = 'gvn-sink-test.ll' +source_filename = "bugpoint-output-9bbed91.bc" +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.spam = type opaque + +declare void @bar() local_unnamed_addr #0 + +; Function Attrs: noinline nounwind uwtable +define fastcc void @wombat() unnamed_addr #1 { +bb: + br i1 undef, label %bb1, label %bb10 + +bb1: ; preds = %bb + br i1 undef, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + unreachable + +bb3: ; preds = %bb1 + br i1 undef, label %bb4, label %bb5 + +bb4: ; preds = %bb3 + %tmp = load %struct.spam*, %struct.spam** undef, align 8 + br label %bb7 + +bb5: ; preds = %bb3 + %tmp6 = load %struct.spam*, %struct.spam** undef, align 8 + br label %bb7 + +bb7: ; preds = %bb5, %bb4 + %tmp8 = phi %struct.spam* [ %tmp, %bb4 ], [ %tmp6, %bb5 ] + call void @bar() #2 + %tmp9 = load %struct.spam*, %struct.spam** undef, align 8 + unreachable + +bb10: ; preds = %bb + ret void +} + +attributes #0 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} Index: llvm/test/Transforms/GVNHoist/hoist-mssa.ll =================================================================== --- llvm/test/Transforms/GVNHoist/hoist-mssa.ll +++ llvm/test/Transforms/GVNHoist/hoist-mssa.ll @@ -1,7 +1,9 @@ ; RUN: opt -S -gvn-hoist < %s | FileCheck %s -; Check that store hoisting works: there should be only one store left. -; CHECK-LABEL: @getopt +; Check that store hoisting works: there should be only one store hoisted +; because load cannot be hoisted as it is not profitable. +; CHECK-LABEL: @getopt( +; CHECK: store i32 ; CHECK: store i32 ; CHECK-NOT: store i32 @@ -47,6 +49,51 @@ ret void } +; Check that store hoisting works: there should be only one store left because +; load should also be hoisted in this case. +; CHECK-LABEL: @getopt_hoist_load( +; CHECK: store i32 +; CHECK-NOT: store i32 +define void @getopt_hoist_load(i32* %p1) { +bb: + br label %bb1 + +bb1: ; preds = %bb + br i1 undef, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb13 + +bb3: ; preds = %bb1 + br i1 undef, label %bb4, label %bb9 + +bb4: ; preds = %bb3 + %tmp = load i32, i32* @optind, align 4 + br i1 undef, label %bb5, label %bb7 + +bb5: ; preds = %bb4 + %tmp6 = add nsw i32 %tmp, 1 + store i32 %tmp6, i32* %p1, align 4 + br label %bb12 + +bb7: ; preds = %bb4 + %tmp8 = add nsw i32 %tmp, 1 + store i32 %tmp8, i32* %p1, align 4 + br label %bb13 + +bb9: ; preds = %bb3 + %tmp10 = load i32, i32* @optind, align 4 + %tmp11 = add nsw i32 %tmp10, 1 + store i32 %tmp11, i32* %p1, align 4 + br label %bb12 + +bb12: ; preds = %bb9, %bb5 + br label %bb13 + +bb13: ; preds = %bb12, %bb7, %bb2 + ret void +} + @GlobalVar = internal global float 1.000000e+00 ; Check that we hoist stores and remove the MSSA phi node. Index: llvm/test/Transforms/GVNHoist/hoist-very-busy.ll =================================================================== --- llvm/test/Transforms/GVNHoist/hoist-very-busy.ll +++ llvm/test/Transforms/GVNHoist/hoist-very-busy.ll @@ -32,3 +32,24 @@ declare void @longjmp(%struct.__jmp_buf_tag*, i32) #0 attributes #0 = { noreturn nounwind } + +; Check that the call and fcmp are hoisted. +; CHECK-LABEL: define void @fun( +; CHECK: store +; CHECK-NOT: store + +define void @fun() { +entry: + br label %if.then + +if.then: ; preds = %entry + br i1 undef, label %sw0, label %sw1 + +sw0: + store i32 1, i32* @G + unreachable + +sw1: + store i32 1, i32* @G + ret void +} Index: llvm/test/Transforms/GVNHoist/sink-load-dom-numbering.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/sink-load-dom-numbering.ll @@ -0,0 +1,58 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; CHECK: load +; CHECK-NOT: load + +%struct.blam = type { %struct.wibble } +%struct.wibble = type { i8*, i32 } + +declare void @hoge() + +define void @eggs() { +bb: + br label %bb1 + +bb1: ; preds = %bb + br label %bb2 + +bb2: ; preds = %bb1 + br i1 undef, label %bb3, label %bb4 + +bb3: ; preds = %bb2 + unreachable + +bb4: ; preds = %bb2 + br label %bb5 + +bb5: ; preds = %bb4 + br i1 undef, label %bb6, label %bb9 + +bb6: ; preds = %bb5 + br i1 undef, label %bb8, label %bb7 + +bb7: ; preds = %bb6 + call void @hoge() + br label %bb8 + +bb8: ; preds = %bb7, %bb6 + %tmp = load %struct.blam*, %struct.blam** undef, align 8, !tbaa !1 + br label %bb11 + +bb9: ; preds = %bb5 + %tmp10 = load %struct.blam*, %struct.blam** undef, align 8, !tbaa !1 + br label %bb11 + +bb11: ; preds = %bb9, %bb8 + %tmp12 = phi %struct.blam* [ %tmp, %bb8 ], [ %tmp10, %bb9 ] + unreachable +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} +!1 = !{!2, !3, i64 8} +!2 = !{!"tree_common", !3, i64 0, !3, i64 8, !6, i64 16, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 17, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 18, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19, !6, i64 19} +!3 = !{!"any pointer", !4, i64 0} +!4 = !{!"omnipotent char", !5, i64 0} +!5 = !{!"Simple C/C++ TBAA"} +!6 = !{!"int", !4, i64 0} Index: llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-1.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-1.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; Check that load is sunk to its successor. +; CHECK: load +; CHECK-NOT: load + +@reg_last_set_invalid = external global i8*, align 8 + +; Function Attrs: nounwind uwtable +define void @record_value_for_reg() { +entry: + br label %for.cond + +for.cond: ; preds = %entry + br label %for.cond139 + +for.cond139: ; preds = %if.end156, %for.cond + br i1 undef, label %for.body142, label %for.end159 + +for.body142: ; preds = %for.cond139 + store i32 undef, i32* undef, align 4, !tbaa !1 + br i1 undef, label %land.lhs.true146, label %if.else + +land.lhs.true146: ; preds = %for.body142 + br i1 undef, label %if.then151, label %if.else + +if.then151: ; preds = %land.lhs.true146 + %0 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !5 + br label %if.end156 + +if.else: ; preds = %land.lhs.true146, %for.body142 + %1 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !5 + br label %if.end156 + +if.end156: ; preds = %if.else, %if.then151 + %.sink5 = phi i8* [ %1, %if.else ], [ %0, %if.then151 ] + br label %for.cond139 + +for.end159: ; preds = %for.cond139 + br i1 undef, label %land.lhs.true161, label %if.end174 + +land.lhs.true161: ; preds = %for.end159 + br label %if.end174 + +if.end174: ; preds = %land.lhs.true161, %for.end159 + unreachable +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} +!1 = !{!2, !2, i64 0} +!2 = !{!"int", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !3, i64 0} Index: llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-2.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/sink-load-with-memory-phi-2.ll @@ -0,0 +1,45 @@ +; RUN: opt -S -gvn-hoist < %s | FileCheck %s + +; CHECK: load +; CHECK-NOT: load +@reg_last_set_invalid = external global i8*, align 8 + +; Function Attrs: nounwind uwtable +define void @record_value_for_reg() { +entry: + br label %for.cond + +for.cond: ; preds = %entry + br label %for.cond139 + +for.cond139: ; preds = %if.end156, %for.cond + br label %for.body142 + +for.body142: ; preds = %for.cond139 + br i1 undef, label %land.lhs.true146, label %if.else + +land.lhs.true146: ; preds = %for.body142 + br i1 undef, label %if.then151, label %if.else + +if.then151: ; preds = %land.lhs.true146 + %0 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !1 + br label %if.end156 + +if.else: ; preds = %land.lhs.true146, %for.body142 + %1 = load i8*, i8** @reg_last_set_invalid, align 8, !tbaa !1 + br label %if.end156 + +if.end156: ; preds = %if.else, %if.then151 + %.sink5 = phi i8* [ %1, %if.else ], [ %0, %if.then151 ] + store i8 undef, i8* undef, align 1, !tbaa !5 + br label %for.cond139 +} + +!llvm.ident = !{!0} + +!0 = !{!"clang version 5.0.0 "} +!1 = !{!2, !2, i64 0} +!2 = !{!"any pointer", !3, i64 0} +!3 = !{!"omnipotent char", !4, i64 0} +!4 = !{!"Simple C/C++ TBAA"} +!5 = !{!3, !3, i64 0}