Index: llvm/include/llvm/Support/GenericDomTree.h =================================================================== --- llvm/include/llvm/Support/GenericDomTree.h +++ llvm/include/llvm/Support/GenericDomTree.h @@ -721,11 +721,12 @@ llvm::make_unique>(BB, IDomNode))).get(); } - NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); } - void addRoot(NodeT *BB) { this->Roots.push_back(BB); } public: + + NodeT *getIDom(NodeT *BB) const { return IDoms.lookup(BB); } + /// updateDFSNumbers - Assign In and Out numbers to the nodes while walking /// dominator tree in dfs order. void updateDFSNumbers() const { Index: llvm/include/llvm/Support/GenericDomTreeConstruction.h =================================================================== --- llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -269,7 +269,7 @@ } // Free temporary memory used to construct idom's - DT.IDoms.clear(); + //DT.IDoms.clear(); DT.Info.clear(); DT.Vertex.clear(); DT.Vertex.shrink_to_fit(); Index: llvm/lib/Transforms/IPO/PassManagerBuilder.cpp =================================================================== --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -137,7 +137,7 @@ "(default = 75)")); static cl::opt EnableGVNHoist( - "enable-gvn-hoist", cl::init(false), cl::Hidden, + "enable-gvn-hoist", cl::init(true), cl::Hidden, cl::desc("Enable the GVN hoisting pass")); static cl::opt @@ -477,6 +477,10 @@ MPM.add(Inliner); Inliner = nullptr; } + // GVNHoist + if(EnableGVNHoist) + MPM.add(createGVNHoistPass()); + if (!DisableUnitAtATime) MPM.add(createPostOrderFunctionAttrsLegacyPass()); if (OptLevel > 2) Index: llvm/lib/Transforms/Scalar/GVNHoist.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVNHoist.cpp +++ llvm/lib/Transforms/Scalar/GVNHoist.cpp @@ -17,14 +17,57 @@ // is disabled in the following cases. // 1. Scalars across calls. // 2. geps when corresponding load/store cannot be hoisted. +// +// TODO: +// For -Oz scalars are always safe to hoist. +// For -O2/-O3 hoist only when the live range improves or remains the same. +// If we haven't computed dominator tree levels, do so now. +// For sink operation anything after hoist barrier is okay to sink +// but nothing before the hoist barrier. See Danny's patch on HoistBarriers. +// +// Find sink opportunities: sink instructions to a common post dominator when +// they compute the same value and hoisting isn't profitable because that will +// increase the liveness. In this case sinking may decrease the liveness when +// the instruction can be delayed. But do not delay so much that latency of the +// operation takes over. Like divide takes 10 cycles maybe, and if the divide is +// sunk into a BB where it is used then the result of divide may take long time +// anyways. So an estimated latency of operation should be computed and the +// instruction should be sunk only till the point when result may be readily +// available for the user of the instruction. +// +// Sinking store may not always be beneficial: sinking frees up a register so +// improves register allocation the stored result may be loaded again, in that +// case it is good to store soon (??) +// +// TODO: Hoist from >2 successors. Currently GVNHoist will not hoist stores +// in this case because it works on two instructions at a time. +// entry: +// switch i32 %c1, label %exit1 [ +// i32 0, label %sw0 +// i32 1, label %sw1 +// ] +// +// sw0: +// store i32 1, i32* @G +// br label %exit +// +// sw1: +// store i32 1, i32* @G +// br label %exit +// +// exit1: +// store i32 1, i32* @G +// ret void +// exit: +// ret void //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/MemorySSA.h" #include "llvm/Transforms/Utils/MemorySSAUpdater.h" @@ -34,6 +77,7 @@ #define DEBUG_TYPE "gvn-hoist" STATISTIC(NumHoisted, "Number of instructions hoisted"); +STATISTIC(NumSunk, "Number of instructions sunk"); STATISTIC(NumRemoved, "Number of instructions removed"); STATISTIC(NumLoadsHoisted, "Number of loads hoisted"); STATISTIC(NumLoadsRemoved, "Number of loads removed"); @@ -41,6 +85,7 @@ STATISTIC(NumStoresRemoved, "Number of stores removed"); STATISTIC(NumCallsHoisted, "Number of calls hoisted"); STATISTIC(NumCallsRemoved, "Number of calls removed"); +//STATISTIC(NumBarriers, "Number of barriers"); static cl::opt MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), @@ -60,6 +105,9 @@ MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10), cl::desc("Maximum length of dependent chains to hoist " "(default = 10, unlimited = -1)")); +static cl::opt + CheckProfitability("gvn-hoist-check-profitability", cl::Hidden, cl::init(true), + cl::desc("Check for proitability (reducing register pressure)")); namespace llvm { @@ -73,13 +121,6 @@ // Returns true when A executes before B. bool operator()(const Instruction *A, const Instruction *B) const { - // FIXME: libc++ has a std::sort() algorithm that will call the compare - // function on the same element. Once PR20837 is fixed and some more years - // pass by and all the buildbots have moved to a corrected std::sort(), - // enable the following assert: - // - // assert(A != B); - const BasicBlock *BA = A->getParent(); const BasicBlock *BB = B->getParent(); unsigned ADFS, BDFS; @@ -114,6 +155,8 @@ VNtoScalars[{V, InvalidVN}].push_back(I); } + void clear() { VNtoScalars.clear(); } + const VNtoInsns &getVNTable() const { return VNtoScalars; } }; @@ -130,6 +173,8 @@ } } + void clear() { VNtoLoads.clear(); } + const VNtoInsns &getVNTable() const { return VNtoLoads; } }; @@ -149,6 +194,8 @@ VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store); } + void clear() { VNtoStores.clear(); } + const VNtoInsns &getVNTable() const { return VNtoStores; } }; @@ -175,6 +222,12 @@ VNtoCallsStores[Entry].push_back(Call); } + void clear() { + VNtoCallsScalars.clear(); + VNtoCallsLoads.clear(); + VNtoCallsStores.clear(); + } + const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } @@ -185,6 +238,13 @@ typedef DenseMap BBSideEffectsSet; typedef SmallVector SmallVecInsn; typedef SmallVectorImpl SmallVecImplInsn; +typedef SmallSet SmallSetBB; +typedef DenseMap MergeSetT; +typedef SmallVector SmallVecBB; +typedef SmallVecBB BBLevelKeyT; +typedef std::map BBLevelT; +typedef DenseMap DomLevelsT; +typedef std::pair EdgeT; static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { static const unsigned KnownIDs[] = { @@ -195,55 +255,123 @@ combineMetadata(ReplInst, I, KnownIDs); } +void printBBLevels(const BBLevelT &BBLevels) { + for (const std::pair &P : BBLevels) { + dbgs() << "\nLevel: " << P.first << "\n"; + for (const BasicBlock *BB : P.second) + dbgs() << *BB << "\n"; + } +} + +void printMergeSet(const MergeSetT &M) { + // For printing in a deterministic order. + typedef std::set SetConstBB; + std::map PrintM; + for (const std::pair &P : M) { + for (const BasicBlock *BB : P.second) + PrintM[P.first].insert(BB); + } + for (const std::pair &P : PrintM) { + dbgs() << "\nMergeSet of: " << P.first->getName() << ": "; + for (const BasicBlock *BB : P.second) + dbgs() << BB->getName() << ", "; + } +} + +void printJEdges(const DenseSet &Edges) { + // For printing in a deterministic order. + std::set PrintE(Edges.begin(), Edges.end()); + + for (const EdgeT &E : PrintE) + dbgs() << "\nFound a JEdge: " << E.first->getName() << " -> " + << E.second->getName(); +} + +void printSmallSet(SmallSetBB &S) { + dbgs() << "\nPrinting SmallSet: "; + for (const auto &BB : S) + dbgs() << BB->getName() << ","; +} + +static inline void updateLocalStats(const Instruction* Repl, unsigned &NI, unsigned &NL, unsigned &NS, unsigned &NC) { + if (isa(Repl)) + ++NL; + else if (isa(Repl)) + ++NS; + else if (isa(Repl)) + ++NC; + else // Scalar + ++NI; +} + +static inline void updateGlobalStats(unsigned NI, unsigned NL, unsigned NS, unsigned NC, unsigned NR) { + NumHoisted += NL + NS + NC + NI; + NumRemoved += NR; + NumLoadsHoisted += NL; + NumStoresHoisted += NS; + NumCallsHoisted += NC; +} + // This pass hoists common computations across branches sharing common // dominator. The primary goal is to reduce the code size, and in some // cases reduce critical path (by exposing more ILP). class GVNHoist { public: + // Each element of a hoisting list contains the basic block where to hoist and + // a list of instructions to be hoisted. + typedef std::pair HoistingPointInfo; + typedef SmallVector HoistingPointList; + GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD, MemorySSA *MSSA) : DT(DT), AA(AA), MD(MD), MSSA(MSSA), MSSAUpdater(make_unique(MSSA)), - HoistingGeps(false), - HoistedCtr(0) - { } - - bool run(Function &F) { - VN.setDomTree(DT); - VN.setAliasAnalysis(AA); - VN.setMemDep(MD); - bool Res = false; - // Perform DFS Numbering of instructions. - unsigned BBI = 0; - for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { - DFSNumber[BB] = ++BBI; - unsigned I = 0; - for (auto &Inst : *BB) - DFSNumber[&Inst] = ++I; - } + HoistedCtr(0), HoistingGeps(false) { + clearVNTables(); + } - int ChainLength = 0; + void clearVNTables() { + II.clear(); + LI.clear(); + SI.clear(); + CI.clear(); + } - // FIXME: use lazy evaluation of VN to avoid the fix-point computation. - while (1) { - if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) - return Res; + // DomLevels maps from BB -> its depth from root. + // JEdges only contain the J edges as D edges are available in Dominator Tree. + // BBLevels maps each depth in the CFG to all the Basic Blocks at that level. + // DJ Graph is described in "Sreedhar, Vugranam C. Efficient program analysis + // using DJ graphs. McGill University, 1996". + void constructDJGraph(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels); + + // Return true if S1 is a subset of S2. + bool isSubset(const SmallSetBB &S1, const SmallSetBB &S2) { + if (S1.size() > S2.size()) + return false; + for (BasicBlock *BB : S1) { + if (!S2.count(BB)) + return false; + } + return true; + } - auto HoistStat = hoistExpressions(F); - if (HoistStat.first + HoistStat.second == 0) - return Res; + // DomLevels maps BB to its depth from root. + // JEdges only contain the J-edges as D-edges are available in Dominator Tree. + // BBLevels maps each depth in the CFG to all the BBs at that level. + // BILARDI, G. AND PINGALI, K. 2003. Algorithms for computing the static + // single assignment form. J. ACM 50, 3 (May), 375–425. + bool constructMergeSet(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels); - if (HoistStat.second > 0) - // To address a limitation of the current GVN, we need to rerun the - // hoisting after we hoisted loads or stores in order to be able to - // hoist all scalars dependent on the hoisted ld/st. - VN.clear(); + // Returns true if the \p Op is the last use at I. + // TODO: Find O(1) algorithm for this. + const Instruction *lastUser(const Instruction *I, const Value *Val) const; - Res = true; - } + // Returns true if the \p Op is live-out from \p BB. + bool isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const; - return Res; - } + bool run(Function &F); private: GVN::ValueTable VN; @@ -252,31 +380,28 @@ MemoryDependenceResults *MD; MemorySSA *MSSA; std::unique_ptr MSSAUpdater; - const bool HoistingGeps; DenseMap DFSNumber; BBSideEffectsSet BBSideEffects; + MergeSetT MergeSet; + DenseSet HoistBarrier; + InsnInfo II; + LoadInfo LI; + StoreInfo SI; + CallInfo CI; int HoistedCtr; + const bool HoistingGeps; + //unsigned NumFuncArgs; + //DenseMap Rank; enum InsKind { Unknown, Scalar, Load, Store }; - // Return true when there are exception handling in BB. - bool hasEH(const BasicBlock *BB) { - auto It = BBSideEffects.find(BB); - if (It != BBSideEffects.end()) - return It->second; - - if (BB->isEHPad() || BB->hasAddressTaken()) { - BBSideEffects[BB] = true; - return true; - } - - if (BB->getTerminator()->mayThrow()) { - BBSideEffects[BB] = true; - return true; - } - - BBSideEffects[BB] = false; - return false; + // Return true when I1 appears before I2 in the instructions of BB. + bool firstInBB(const Instruction *I1, const Instruction *I2) const { + assert(I1->getParent() == I2->getParent()); + unsigned I1DFS = DFSNumber.lookup(I1); + unsigned I2DFS = DFSNumber.lookup(I2); + assert(I1DFS && I2DFS); + return I1DFS < I2DFS; } // Return true when a successor of BB dominates A. @@ -288,693 +413,1331 @@ return false; } + bool hoistCandidate(const User *U) const { + if (!VN.exists(const_cast(U))) // Only for scalars + return false; + unsigned V = VN.lookup(const_cast(U)); + + // Multiple scalars with same VN have very high chance of being hoisted. + if (II.getVNTable().count({V, InvalidVN}) > 1) + return true; + + return false; + } + + // Return true when there are exception handling in BB. + bool hasEH(const BasicBlock *BB); + + // Return true when there are exception handling or loads of memory Def + // between Def and NewPt. This function is only called for stores: Def is + // the MemoryDef of the store to be hoisted. + // + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaces 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, + int &NBBsOnAllPaths); + + // Return true when there are exception handling between HoistPt and BB. + // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and + // return true when the counter NBBsOnAllPaths reaches 0, except when it is + // initialized to -1 which is unlimited. + bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, + int &NBBsOnAllPaths); + // Return true when all paths from HoistBB to the end of the function pass // through one of the blocks in WL. bool hoistingFromAllPaths(const BasicBlock *HoistBB, - SmallPtrSetImpl &WL) { + SmallPtrSetImpl &WL); - // Copy WL as the loop will remove elements from it. - SmallPtrSet WorkList(WL.begin(), WL.end()); + // Return true when there are memory uses of Def in BB. + bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, + const BasicBlock *BB); - for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) { - // There exists a path from HoistBB to the exit of the function if we are - // still iterating in DF traversal and we removed all instructions from - // the work list. - if (WorkList.empty()) - return false; + // Return true when it is safe to hoist scalar instructions from all blocks in + // WL to HoistBB. TODO: Hoisting scalars past a call only when the + // interference does not increase. + bool safeToHoistScalar(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL, + int &NBBsOnAllPaths); - const BasicBlock *BB = *It; - if (WorkList.erase(BB)) { - // Stop DFS traversal when BB is in the work list. - It.skipChildren(); - continue; - } + // Return true when it is safe to hoist a memory load or store U from OldPt + // to NewPt. + bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, + MemoryUseOrDef *MA, InsKind K, int &NBBsOnAllPaths); - // Check for end of function, calls that do not return, etc. - if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) - return false; + bool safeToHoistLdSt_0(const Instruction *NewHoistPt, + const Instruction *HoistPt, const Instruction *Insn, + MemoryUseOrDef *MA, GVNHoist::InsKind K, + int &NumBBsOnAllPaths, const BasicBlock *HoistPtBB, + const BasicBlock *NewHoistBB, const BasicBlock *InsnBB, + SmallPtrSetImpl &WL); - // When reaching the back-edge of a loop, there may be a path through the - // loop that does not pass through B or C before exiting the loop. - if (successorDominate(BB, HoistBB)) - return false; + // Return true when it is profitable to hoist I1. + bool profitableToHoist(Instruction *I) const; - // Increment DFS traversal when not skipping children. - ++It; - } + // Return true when it is profitable to hoist I1. + bool profitableToSink(Instruction *I) const; + + // Partition InstructionsToHoist into a set of candidates which can share a + // common hoisting point. The partitions are collected in HPL. IsScalar is + // true when the instructions in InstructionsToHoist are scalars. IsLoad is + // true when the InstructionsToHoist are loads, false when they are stores. + void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, + HoistingPointList &HPL, InsKind K); + + bool safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K); + + // Initialize HPL from Map. + void findHoistableInsn(const VNtoInsns &Map, HoistingPointList &HPL, + InsKind K); + + /*void getRanks(Function &F); + + void sortByRank(SmallVectorImpl &V, const VNtoInsns &Map) const;*/ + + // Find sinkable instructions. + void findSinkableInsn(const VNtoInsns &Map, HoistingPointList &HPL, + InsKind K); + + // Return true when all operands of Instr are available at insertion point + // HoistPt. When limiting the number of hoisted expressions, one could hoist + // a load without hoisting its access function. So before hoisting any + // expression, make sure that all its operands are available at insert point. + bool allOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) + return false; return true; } - /* Return true when I1 appears before I2 in the instructions of BB. */ - bool firstInBB(const Instruction *I1, const Instruction *I2) { - assert(I1->getParent() == I2->getParent()); - unsigned I1DFS = DFSNumber.lookup(I1); - unsigned I2DFS = DFSNumber.lookup(I2); - assert(I1DFS && I2DFS); - return I1DFS < I2DFS; + // Same as allOperandsAvailable with recursive check for GEP operands. + bool allGepOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const; + + // Make all operands of the GEP available. + void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist, + Instruction *Gep) const; + + // In the case Repl is a load or a store, we make all their GEPs + // available: GEPs are not hoisted by default to avoid the address + // computations to be hoisted without the associated load or store. + bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const; + + void setAlignment(Instruction *I, Instruction *Repl); + // Remove and rename all instructions other than Repl. + void removeAndReplace(const SmallVecInsn &InstructionsToHoist, + Instruction *Repl); + void removeAndReplace(Instruction *I, Instruction *Repl, MemoryAccess *NewMemAcc); + void removeMPhi(MemoryAccess *NewMemAcc); + std::pair hoist(HoistingPointList &HPL); + std::pair sink(HoistingPointList &HPL); + bool getVN(Instruction *I); + void barriersAndVNs(Function &F, bool Sinking); + // Hoist all expressions. Returns Number of scalars hoisted + // and number of non-scalars hoisted. + std::pair hoistExpressions(Function &F); + std::pair sinkExpressions(Function &F); +}; + +class GVNHoistLegacyPass : public FunctionPass { +public: + static char ID; + + GVNHoistLegacyPass() : FunctionPass(ID) { + initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); } - // Return true when there are memory uses of Def in BB. - bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, - const BasicBlock *BB) { - const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); - if (!Acc) + bool runOnFunction(Function &F) override { + if (skipFunction(F)) return false; + auto &DT = getAnalysis().getDomTree(); + auto &AA = getAnalysis().getAAResults(); + auto &MD = getAnalysis().getMemDep(); + auto &MSSA = getAnalysis().getMSSA(); - Instruction *OldPt = Def->getMemoryInst(); - const BasicBlock *OldBB = OldPt->getParent(); - const BasicBlock *NewBB = NewPt->getParent(); - bool ReachedNewPt = false; + GVNHoist G(&DT, &AA, &MD, &MSSA); + return G.run(F); + } - for (const MemoryAccess &MA : *Acc) - if (const MemoryUse *MU = dyn_cast(&MA)) { - Instruction *Insn = MU->getMemoryInst(); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + } +}; - // Do not check whether MU aliases Def when MU occurs after OldPt. - if (BB == OldBB && firstInBB(OldPt, Insn)) - break; +bool GVNHoist::hasEH(const BasicBlock *BB) { + auto It = BBSideEffects.find(BB); + if (It != BBSideEffects.end()) + return It->second; - // Do not check whether MU aliases Def when MU occurs before NewPt. - if (BB == NewBB) { - if (!ReachedNewPt) { - if (firstInBB(Insn, NewPt)) - continue; - ReachedNewPt = true; - } - } - if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) - return true; - } + if (BB->isEHPad() || BB->hasAddressTaken()) { + BBSideEffects[BB] = true; + return true; + } - return false; + if (BB->getTerminator()->mayThrow()) { + BBSideEffects[BB] = true; + return true; } - // Return true when there are exception handling or loads of memory Def - // between Def and NewPt. This function is only called for stores: Def is - // the MemoryDef of the store to be hoisted. + BBSideEffects[BB] = false; + return false; +} - // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and - // return true when the counter NBBsOnAllPaths reaces 0, except when it is - // initialized to -1 which is unlimited. - bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, - int &NBBsOnAllPaths) { - const BasicBlock *NewBB = NewPt->getParent(); - const BasicBlock *OldBB = Def->getBlock(); - assert(DT->dominates(NewBB, OldBB) && "invalid path"); - assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && - "def does not dominate new hoisting point"); - - // Walk all basic blocks reachable in depth-first iteration on the inverse - // CFG from OldBB to NewBB. These blocks are all the blocks that may be - // executed between the execution of NewBB and OldBB. Hoisting an expression - // from OldBB into NewBB has to be safe on all execution paths. - for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { - if (*I == NewBB) { - // Stop traversal when reaching HoistPt. - I.skipChildren(); - continue; - } +bool GVNHoist::hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def, + int &NBBsOnAllPaths) { + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = Def->getBlock(); + assert(DT->dominates(NewBB, OldBB) && "invalid path"); + assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) && + "def does not dominate new hoisting point"); + + // Walk all basic blocks reachable in depth-first iteration on the inverse + // CFG from OldBB to NewBB. These blocks are all the blocks that may be + // executed between the execution of NewBB and OldBB. Hoisting an expression + // from OldBB into NewBB has to be safe on all execution paths. + for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { + const BasicBlock *BB = *I; + if (BB == NewBB) { + // Stop traversal when reaching HoistPt. + I.skipChildren(); + continue; + } - // Stop walk once the limit is reached. - if (NBBsOnAllPaths == 0) - return true; + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; - // Impossible to hoist with exceptions on the path. - if (hasEH(*I)) - return true; + // Impossible to hoist with exceptions on the path. + if (hasEH(BB)) + return true; - // Check that we do not move a store past loads. - if (hasMemoryUse(NewPt, Def, *I)) - return true; + // No such instruction after HoistBarrier in a basic block was + // selected for hoisting so instructions selected within basic block with + // a hoist barrier can be hoisted. + if ((BB != OldBB) && HoistBarrier.count(BB)) + return true; - // -1 is unlimited number of blocks on all paths. - if (NBBsOnAllPaths != -1) - --NBBsOnAllPaths; + // Check that we do not move a store past loads. + if (hasMemoryUse(NewPt, Def, BB)) + return true; - ++I; - } + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; - return false; + ++I; } - // Return true when there are exception handling between HoistPt and BB. - // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and - // return true when the counter NBBsOnAllPaths reaches 0, except when it is - // initialized to -1 which is unlimited. - bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB, - int &NBBsOnAllPaths) { - assert(DT->dominates(HoistPt, BB) && "Invalid path"); - - // Walk all basic blocks reachable in depth-first iteration on - // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the - // blocks that may be executed between the execution of NewHoistPt and - // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe - // on all execution paths. - for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) { - if (*I == HoistPt) { - // Stop traversal when reaching NewHoistPt. - I.skipChildren(); - continue; - } + return false; +} - // Stop walk once the limit is reached. - if (NBBsOnAllPaths == 0) - return true; +bool GVNHoist::hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *SrcBB, + int &NBBsOnAllPaths) { + assert(DT->dominates(HoistPt, SrcBB) && "Invalid path"); + + // Walk all basic blocks reachable in depth-first iteration on + // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the + // blocks that may be executed between the execution of NewHoistPt and + // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe + // on all execution paths. + for (auto I = idf_begin(SrcBB), E = idf_end(SrcBB); I != E;) { + const BasicBlock *BB = *I; + if (BB == HoistPt) { + // Stop traversal when reaching NewHoistPt. + I.skipChildren(); + continue; + } - // Impossible to hoist with exceptions on the path. - if (hasEH(*I)) - return true; + // Stop walk once the limit is reached. + if (NBBsOnAllPaths == 0) + return true; - // -1 is unlimited number of blocks on all paths. - if (NBBsOnAllPaths != -1) - --NBBsOnAllPaths; + // Impossible to hoist with exceptions on the path. + if (hasEH(BB)) + return true; - ++I; - } + // No such instruction after HoistBarrier in a basic block was + // selected for hoisting so instructions selected within basic block with + // a hoist barrier can be hoisted. + if ((BB != SrcBB) && HoistBarrier.count(BB)) + return true; - return false; + // -1 is unlimited number of blocks on all paths. + if (NBBsOnAllPaths != -1) + --NBBsOnAllPaths; + + ++I; } - // Return true when it is safe to hoist a memory load or store U from OldPt - // to NewPt. - bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, - MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { + return false; +} - // In place hoisting is safe. - if (NewPt == OldPt) - return true; +bool GVNHoist::hoistingFromAllPaths(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL) { - const BasicBlock *NewBB = NewPt->getParent(); - const BasicBlock *OldBB = OldPt->getParent(); - const BasicBlock *UBB = U->getBlock(); + // Copy WL as the loop will remove elements from it. + SmallPtrSet WorkList(WL.begin(), WL.end()); - // Check for dependences on the Memory SSA. - MemoryAccess *D = U->getDefiningAccess(); - BasicBlock *DBB = D->getBlock(); - if (DT->properlyDominates(NewBB, DBB)) - // Cannot move the load or store to NewBB above its definition in DBB. + for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) { + // There exists a path from HoistBB to the exit of the function if we are + // still iterating in DF traversal and we removed all instructions from + // the work list. + if (WorkList.empty()) return false; - if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) - if (auto *UD = dyn_cast(D)) - if (firstInBB(NewPt, UD->getMemoryInst())) - // Cannot move the load or store to NewPt above its definition in D. - return false; + const BasicBlock *BB = *It; + if (WorkList.erase(BB)) { + // Stop DFS traversal when BB is in the work list. + It.skipChildren(); + continue; + } - // Check for unsafe hoistings due to side effects. - if (K == InsKind::Store) { - if (hasEHOrLoadsOnPath(NewPt, dyn_cast(U), NBBsOnAllPaths)) - return false; - } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + // We reached the leaf Basic Block => not all paths have this instruction. + if (!BB->getTerminator()->getNumSuccessors()) + return false; + + // When reaching the back-edge of a loop, there may be a path through the + // loop that does not pass through B or C before exiting the loop. + if (successorDominate(BB, HoistBB)) return false; - if (UBB == NewBB) { - if (DT->properlyDominates(DBB, NewBB)) + // Increment DFS traversal when not skipping children. + ++It; + } + + return true; +} + +bool GVNHoist::hasMemoryUse(const Instruction *NewPt, MemoryDef *Def, + const BasicBlock *BB) { + const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB); + if (!Acc) + return false; + + Instruction *OldPt = Def->getMemoryInst(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *NewBB = NewPt->getParent(); + bool ReachedNewPt = false; + + for (const MemoryAccess &MA : *Acc) + if (const MemoryUse *MU = dyn_cast(&MA)) { + Instruction *Insn = MU->getMemoryInst(); + + // Do not check whether MU aliases Def when MU occurs after OldPt. + if (BB == OldBB && firstInBB(OldPt, Insn)) + break; + + // Do not check whether MU aliases Def when MU occurs before NewPt. + if (BB == NewBB) { + if (!ReachedNewPt) { + if (firstInBB(Insn, NewPt)) + continue; + ReachedNewPt = true; + } + } + if (MemorySSAUtil::defClobbersUseOrDef(Def, MU, *AA)) return true; - assert(UBB == DBB); - assert(MSSA->locallyDominates(D, U)); } - // No side effects: it is safe to hoist. - return true; - } + return false; +} - // Return true when it is safe to hoist scalar instructions from all blocks in - // WL to HoistBB. - bool safeToHoistScalar(const BasicBlock *HoistBB, - SmallPtrSetImpl &WL, - int &NBBsOnAllPaths) { - // Check that the hoisted expression is needed on all paths. - if (!hoistingFromAllPaths(HoistBB, WL)) +bool GVNHoist::safeToHoistScalar(const BasicBlock *HoistBB, + SmallPtrSetImpl &WL, + int &NBBsOnAllPaths) { + // Check that the hoisted expression is needed on all paths. + if (!hoistingFromAllPaths(HoistBB, WL)) + return false; + + for (const BasicBlock *BB : WL) + if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths)) return false; - for (const BasicBlock *BB : WL) - if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths)) - return false; + return true; +} +bool GVNHoist::safeToHoistLdSt(const Instruction *NewPt, + const Instruction *OldPt, MemoryUseOrDef *MA, + GVNHoist::InsKind K, int &NBBsOnAllPaths) { + + // In place hoisting is safe. + if (NewPt == OldPt) return true; - } - // Each element of a hoisting list contains the basic block where to hoist and - // a list of instructions to be hoisted. - typedef std::pair HoistingPointInfo; - typedef SmallVector HoistingPointList; + const BasicBlock *NewBB = NewPt->getParent(); + const BasicBlock *OldBB = OldPt->getParent(); + const BasicBlock *MABB = MA->getBlock(); - // Partition InstructionsToHoist into a set of candidates which can share a - // common hoisting point. The partitions are collected in HPL. IsScalar is - // true when the instructions in InstructionsToHoist are scalars. IsLoad is - // true when the InstructionsToHoist are loads, false when they are stores. - void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, - HoistingPointList &HPL, InsKind K) { - // No need to sort for two instructions. - if (InstructionsToHoist.size() > 2) { - SortByDFSIn Pred(DFSNumber); - std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); - } + // Check for dependences on the Memory SSA. + MemoryAccess *Def = MA->getDefiningAccess(); + BasicBlock *DefBB = Def->getBlock(); + if (DT->properlyDominates(NewBB, DefBB)) + // Cannot move the load or store to NewBB above its definition in DBB. + return false; - int NumBBsOnAllPaths = MaxNumberOfBBSInPath; + if (NewBB == DefBB && !MSSA->isLiveOnEntryDef(Def)) + if (auto *UD = dyn_cast(Def)) + if (firstInBB(NewPt, UD->getMemoryInst())) + // Cannot move the load or store to NewPt above its definition in D. + return false; - SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); - SmallVecImplInsn::iterator Start = II; - Instruction *HoistPt = *II; - BasicBlock *HoistBB = HoistPt->getParent(); - MemoryUseOrDef *UD; - if (K != InsKind::Scalar) - UD = MSSA->getMemoryAccess(HoistPt); - - for (++II; II != InstructionsToHoist.end(); ++II) { - Instruction *Insn = *II; - BasicBlock *BB = Insn->getParent(); - BasicBlock *NewHoistBB; - Instruction *NewHoistPt; - - if (BB == HoistBB) { // Both are in the same Basic Block. - NewHoistBB = HoistBB; - NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt; - } else { - // If the hoisting point contains one of the instructions, - // then hoist there, otherwise hoist before the terminator. - NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB); - if (NewHoistBB == BB) - NewHoistPt = Insn; - else if (NewHoistBB == HoistBB) - NewHoistPt = HoistPt; - else - NewHoistPt = NewHoistBB->getTerminator(); - } + // Check for unsafe hoistings due to side effects. + if (K == InsKind::Store) { + if (hasEHOrLoadsOnPath(NewPt, dyn_cast(MA), NBBsOnAllPaths)) + return false; + } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) + return false; - SmallPtrSet WL; - WL.insert(HoistBB); - WL.insert(BB); + if (MABB == NewBB) { + if (DT->properlyDominates(DefBB, NewBB)) + return true; + assert(MABB == DefBB); + assert(MSSA->locallyDominates(Def, MA)); + } - if (K == InsKind::Scalar) { - if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths)) { - // Extend HoistPt to NewHoistPt. - HoistPt = NewHoistPt; - HoistBB = NewHoistBB; - continue; - } - } else { - // When NewBB already contains an instruction to be hoisted, the - // expression is needed on all paths. - // Check that the hoisted expression is needed on all paths: it is - // unsafe to hoist loads to a place where there may be a path not - // loading from the same address: for instance there may be a branch on - // which the address of the load may not be initialized. - if ((HoistBB == NewHoistBB || BB == NewHoistBB || - hoistingFromAllPaths(NewHoistBB, WL)) && - // Also check that it is safe to move the load or store from HoistPt - // to NewHoistPt, and from Insn to NewHoistPt. - safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NumBBsOnAllPaths) && - safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn), - K, NumBBsOnAllPaths)) { - // Extend HoistPt to NewHoistPt. - HoistPt = NewHoistPt; - HoistBB = NewHoistBB; - continue; + // No side effects: it is safe to hoist. + return true; +} + + +bool GVNHoist::safeToHoistLdSt_0(const Instruction *NewHoistPt, + const Instruction *HoistPt, + const Instruction *Insn, MemoryUseOrDef *MA, + GVNHoist::InsKind K, int &NumBBsOnAllPaths, + const BasicBlock *HoistPtBB, + const BasicBlock *NewHoistBB, + const BasicBlock *InsnBB, + SmallPtrSetImpl &WL) { + return (HoistPtBB == NewHoistBB || InsnBB == NewHoistBB || + hoistingFromAllPaths(NewHoistBB, WL)) && + // Also check that it is safe to move the load or store from HoistPt + // to NewHoistPt, and from Insn to NewHoistPt. Note that HoistPt may + // not be the instruction to be hoisted, it is a transient placeholder + // to find the farthest hoisting point when >2 hoistable candidates + // can be hoisted to a common dominator. + safeToHoistLdSt(NewHoistPt, HoistPt, MA, K, NumBBsOnAllPaths) && + safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn), K, + NumBBsOnAllPaths); +} + +bool GVNHoist::profitableToHoist(Instruction *I) const { + if (!CheckProfitability) + return true; + // For -O3/-O2 hoist only when the liveness decreases i.e., no more than + // one operand can be a use without kill. + // Store and Calls do not create a register def. + if (isa(I) || isa(I)) + return true; + + // If Op is a kill then it will not be live-out from its basic block + // but the reverse is not true. + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + Value *Op = I->getOperand(op); + //if (isa(Op)) + // continue; + if (isLiveOutUsingMergeSet(I->getParent(), Op)) + return false; + // It is always profitable to hoist when the liveness does not increase, + // a Kill will compensate for the def created by this instruction. + const Instruction *LU = lastUser(I, Op); + if (LU == I) + return true; + else { + // We optimistically assume that if all the users of Op are hoistable + // candidates then it is profitable to hoist. + bool stillProfitable = true; + for (const User *U : Op->users()) { + if (!hoistCandidate(U)) { + stillProfitable = false; + break; } + DEBUG(dbgs() << "\nstill hoistable:" << *U); } + if (stillProfitable) + return true; + } + } + return false; +} + +bool GVNHoist::profitableToSink(Instruction *I) const { + // For -O3/-O2 sink only when the liveness decreases i.e., no more than + // one operand can be a kill. + // Store and Calls do not create a register def. + if (isa(I) || isa(I)) + return false; + + // If Op is a kill then it will not be live-out from its basic block + // but the reverse is not true. + unsigned NumKills = 0; + for (unsigned op = 0, e = I->getNumOperands(); op != e; ++op) { + Value *Op = I->getOperand(op); + // It is always profitable to sink when the liveness does not increase. + if (isLiveOutUsingMergeSet(I->getParent(), Op)) + continue; + // A Kill will increase the liveness during a sink. + const Instruction *LU = lastUser(I, Op); + if (LU == I && ++NumKills > 1) + return false; + } + return true; +} + +void GVNHoist::partitionCandidates(SmallVecImplInsn &InstructionsToHoist, + GVNHoist::HoistingPointList &HPL, + GVNHoist::InsKind K) { + // No need to sort for two instructions. + if (InstructionsToHoist.size() > 2) { + SortByDFSIn Pred(DFSNumber); + std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); + } - // At this point it is not safe to extend the current hoisting to - // NewHoistPt: save the hoisting list so far. - if (std::distance(Start, II) > 1) - HPL.push_back({HoistBB, SmallVecInsn(Start, II)}); - - // Start over from BB. - Start = II; - if (K != InsKind::Scalar) - UD = MSSA->getMemoryAccess(*Start); - HoistPt = Insn; - HoistBB = BB; - NumBBsOnAllPaths = MaxNumberOfBBSInPath; + int NumBBsOnAllPaths = MaxNumberOfBBSInPath; + + SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); + SmallVecImplInsn::iterator Start = II; + Instruction *HoistPt = *II; + BasicBlock *HoistPtBB = HoistPt->getParent(); + MemoryUseOrDef *MA; + if (K != InsKind::Scalar) + MA = MSSA->getMemoryAccess(HoistPt); + + for (++II; II != InstructionsToHoist.end(); ++II) { + Instruction *Insn = *II; + BasicBlock *InsnBB = Insn->getParent(); + BasicBlock *NewHoistBB; + Instruction *NewHoistPt; + + if (InsnBB == HoistPtBB) { // Both are in the same Basic Block. + NewHoistBB = HoistPtBB; + NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt; + } else { + // If the hoisting point contains one of the instructions, + // then hoist there, otherwise hoist before the terminator. + NewHoistBB = DT->findNearestCommonDominator(HoistPtBB, InsnBB); + if (NewHoistBB == InsnBB) + NewHoistPt = Insn; + else if (NewHoistBB == HoistPtBB) + NewHoistPt = HoistPt; + else + NewHoistPt = NewHoistBB->getTerminator(); } - // Save the last partition. + SmallPtrSet WL; + WL.insert(HoistPtBB); + WL.insert(InsnBB); + + // When NewBB already contains an instruction to be hoisted, the + // expression is needed on all paths. + // Check that the hoisted expression is needed on all paths: it is + // unsafe to hoist loads to a place where there may be a path not + // loading from the same address: for instance there may be a branch on + // which the address of the load may not be initialized. + if (K == InsKind::Scalar) { + if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths) && + profitableToHoist(Insn)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistPtBB = NewHoistBB; + continue; + } + } else { + if (safeToHoistLdSt_0(NewHoistPt, HoistPt, Insn, MA, K, NumBBsOnAllPaths, + HoistPtBB, NewHoistBB, InsnBB, WL) && + // Hoist loads when hoiting to pred BB even if liveness increases. + (profitableToHoist(Insn) || InsnBB->getSinglePredecessor() == NewHoistBB)) { + // Extend HoistPt to NewHoistPt. + HoistPt = NewHoistPt; + HoistPtBB = NewHoistBB; + continue; + } + } + + // At this point it is not safe to extend the current hoisting to + // NewHoistPt: save the hoisting list so far. if (std::distance(Start, II) > 1) - HPL.push_back({HoistBB, SmallVecInsn(Start, II)}); + HPL.push_back({HoistPtBB, SmallVecInsn(Start, II)}); + + // Start over from BB. + Start = II; + if (K != InsKind::Scalar) + MA = MSSA->getMemoryAccess(*Start); + HoistPt = Insn; + HoistPtBB = InsnBB; + NumBBsOnAllPaths = MaxNumberOfBBSInPath; } - // Initialize HPL from Map. - void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, - InsKind K) { - for (const auto &Entry : Map) { - if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) - return; - - const SmallVecInsn &V = Entry.second; - if (V.size() < 2) - continue; + // Save the last partition. + if (std::distance(Start, II) > 1) + HPL.push_back({HoistPtBB, SmallVecInsn(Start, II)}); +} - // Compute the insertion point and the list of expressions to be hoisted. - SmallVecInsn InstructionsToHoist; - for (auto I : V) - if (!hasEH(I->getParent())) - InstructionsToHoist.push_back(I); +// Returns true when the instruction does not result +// in a hazard with any other instruction till the end of BB. +bool GVNHoist::safeToSinkToEnd(Instruction *I, BasicBlock *BB, GVNHoist::InsKind K) { + BasicBlock::iterator II(I); + auto IMA = MSSA->getMemoryAccess(I); + assert (IMA); + if (MemoryUseOrDef *I0UD = cast(IMA)) { + MemoryAccess *Def = I0UD->getDefiningAccess(); + + // Updating Memory PHI is tricky, bail out for now. + for (User *U : Def->users()) + if (isa(U)) + return false; + } - if (!InstructionsToHoist.empty()) - partitionCandidates(InstructionsToHoist, HPL, K); + while (++II != BB->end()) { // Skip I + auto IIMA = MSSA->getMemoryAccess(&*II); + // TODO: Bails out on any memory writes for now, improve for non-aliases. + if (IIMA) { + if (!isa(IIMA)) + return false; } } + return true; +} - // Return true when all operands of Instr are available at insertion point - // HoistPt. When limiting the number of hoisted expressions, one could hoist - // a load without hoisting its access function. So before hoisting any - // expression, make sure that all its operands are available at insert point. - bool allOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const { - for (const Use &Op : I->operands()) - if (const auto *Inst = dyn_cast(&Op)) - if (!DT->dominates(Inst->getParent(), HoistPt)) - return false; +void GVNHoist::findHoistableInsn(const VNtoInsns &Map, + GVNHoist::HoistingPointList &HPL, + GVNHoist::InsKind K) { + for (const auto &Entry : Map) { + if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) + return; - return true; + const SmallVecInsn &V = Entry.second; + if (V.size() < 2) + continue; + + // Compute the insertion point and the list of expressions to be hoisted. + SmallVecInsn InstructionsToHoist; + for (auto I : V) { + // We don't need to check for hoist-barriers here because if + // I->getParent() has a barrier then I precedes the barrier. + if (!hasEH(I->getParent())) + InstructionsToHoist.push_back(I); + } + + if (!InstructionsToHoist.empty()) + partitionCandidates(InstructionsToHoist, HPL, K); } +} - // Same as allOperandsAvailable with recursive check for GEP operands. - bool allGepOperandsAvailable(const Instruction *I, - const BasicBlock *HoistPt) const { - for (const Use &Op : I->operands()) - if (const auto *Inst = dyn_cast(&Op)) - if (!DT->dominates(Inst->getParent(), HoistPt)) { - if (const GetElementPtrInst *GepOp = - dyn_cast(Inst)) { - if (!allGepOperandsAvailable(GepOp, HoistPt)) - return false; - // Gep is available if all operands of GepOp are available. - } else { - // Gep is not available if it has operands other than GEPs that are - // defined in blocks not dominating HoistPt. - return false; - } - } - return true; +/*void GVNHoist::getRanks(Function &F) { + // Prefer undef to anything else + if (isa(V)) + return 0; + // Global is also a Constant. + if (isa(V)) + return 1; + else if (auto *A = dyn_cast(V)) + return 2 + A->getArgNo(); + if (Rank.count(V)) + return Rank[V]; + if (Instruction *I = dyn_cast(V)) { + unsigned R = 0; + for (Value *V1 : I->operand_values()) { + unsigned R1 = getRanks(V1); + R = std::max(R, R1); + } + R = 3 + NumFuncArgs + IRank; + Rank[V] = R + 1; + return R; } + // Return a really large number. + return std::numeric_limits::max()/2; +} - // Make all operands of the GEP available. - void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist, - Instruction *Gep) const { - assert(allGepOperandsAvailable(Gep, HoistPt) && - "GEP operands not available"); +void GVNHoist::sortByRank(SmallVectorImpl &V, const VNtoInsns &Map) const { + +}*/ + +void GVNHoist::findSinkableInsn(const VNtoInsns &Map, + GVNHoist::HoistingPointList &HPL, + GVNHoist::InsKind K) { + // Sort the VNs by rank, higher ranked should be sunk first. + //SmallVector SortedRank; + //sortByRank(SortedRank, Map); + for (const auto &Entry : Map) { + const SmallVecInsn &V = Entry.second; + if (V.size() < 2) + continue; + + // Only sink to common post-dom + SmallVecInsn InstructionsToSink; + for (Instruction *I : V) { + auto BB = I->getParent(); + // All the sinkable instructions are collected after any barrier in + // a basic block so no need to check for barrier BBs. + + // The instruction should have only one user i.e., PHI in the Succ + // TODO: Handle store, calls etc. without users + unsigned NumUsers = std::distance(I->user_begin(), I->user_end()); + if (NumUsers != 1) + continue; - Instruction *ClonedGep = Gep->clone(); - for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) - if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { + // The only user should be a PHI. + if(!isa(I->user_back())) + continue; - // Check whether the operand is already available. - if (DT->dominates(Op->getParent(), HoistPt)) + // Sink to single successor. + if (auto Succ = BB->getSingleSuccessor()) { + if (I->user_back()->getParent() != Succ) + continue; // PHI should be in the immediate successor. + // The successor should have exactly two predecessors. + unsigned NumPreds = std::distance(pred_begin(Succ), pred_end(Succ)); + if (NumPreds != 2) continue; - // As a GEP can refer to other GEPs, recursively make all the operands - // of this GEP available at HoistPt. - if (GetElementPtrInst *GepOp = dyn_cast(Op)) - makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); + // Sinkable instruction found, check for safety and profitability. + // As the instructions are from a basic block without barriers, we can + // sink this instruction as long as there is no hazard. + if (K != InsKind::Scalar && !safeToSinkToEnd(I, BB, K)) + continue; + if (!profitableToSink(I)) + continue; + + // We don't need to check for barriers here because the instruction + // will be sunk at the beginning of a basic block i.e., before any + // barrier, and I is after any barriers in I->getParent(). + if (!hasEH(Succ)) + InstructionsToSink.push_back(I); + } + } + + //assert(InstructionsToSink.size() < 3 && "Test case"); + // Sort V such that adjacent Instructions share common PDom. + if (InstructionsToSink.size() != 2) + continue; + //assert(InstructionsToSink.size() != 2 && "Test case"); + // There should be a unique PHI using I0 and I1 to be legally sinkable. + if (InstructionsToSink[0]->user_back() == InstructionsToSink[1]->user_back()) { + auto Succ0 = InstructionsToSink[0]->getParent()->getSingleSuccessor(); + auto Succ1 = InstructionsToSink[1]->getParent()->getSingleSuccessor(); + assert(Succ0 == Succ1); + HPL.push_back({Succ0, InstructionsToSink}); + } + } +} + +bool GVNHoist::allGepOperandsAvailable(const Instruction *I, + const BasicBlock *HoistPt) const { + for (const Use &Op : I->operands()) + if (const auto *Inst = dyn_cast(&Op)) + if (!DT->dominates(Inst->getParent(), HoistPt)) { + if (const GetElementPtrInst *GepOp = + dyn_cast(Inst)) { + if (!allGepOperandsAvailable(GepOp, HoistPt)) + return false; + // Gep is available if all operands of GepOp are available. + } else { + // Gep is not available if it has operands other than GEPs that are + // defined in blocks not dominating HoistPt. + return false; + } } + return true; +} - // Copy Gep and replace its uses in Repl with ClonedGep. - ClonedGep->insertBefore(HoistPt->getTerminator()); +void GVNHoist::makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist, + Instruction *Gep) const { + assert(allGepOperandsAvailable(Gep, HoistPt) && "GEP operands not available"); - // Conservatively discard any optimization hints, they may differ on the - // other paths. - ClonedGep->dropUnknownNonDebugMetadata(); + Instruction *ClonedGep = Gep->clone(); + for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i) + if (Instruction *Op = dyn_cast(Gep->getOperand(i))) { - // If we have optimization hints which agree with each other along different - // paths, preserve them. - for (const Instruction *OtherInst : InstructionsToHoist) { - const GetElementPtrInst *OtherGep; - if (auto *OtherLd = dyn_cast(OtherInst)) - OtherGep = cast(OtherLd->getPointerOperand()); - else - OtherGep = cast( - cast(OtherInst)->getPointerOperand()); - ClonedGep->andIRFlags(OtherGep); + // Check whether the operand is already available. + if (DT->dominates(Op->getParent(), HoistPt)) + continue; + + // As a GEP can refer to other GEPs, recursively make all the operands + // of this GEP available at HoistPt. + if (GetElementPtrInst *GepOp = dyn_cast(Op)) + makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp); } - // Replace uses of Gep with ClonedGep in Repl. - Repl->replaceUsesOfWith(Gep, ClonedGep); + // Copy Gep and replace its uses in Repl with ClonedGep. + ClonedGep->insertBefore(HoistPt->getTerminator()); + + // Conservatively discard any optimization hints, they may differ on the + // other paths. + ClonedGep->dropUnknownNonDebugMetadata(); + + // If we have optimization hints which agree with each other along different + // paths, preserve them. + for (const Instruction *OtherInst : InstructionsToHoist) { + const GetElementPtrInst *OtherGep; + if (auto *OtherLd = dyn_cast(OtherInst)) + OtherGep = cast(OtherLd->getPointerOperand()); + else + OtherGep = cast( + cast(OtherInst)->getPointerOperand()); + ClonedGep->andIRFlags(OtherGep); } - // In the case Repl is a load or a store, we make all their GEPs - // available: GEPs are not hoisted by default to avoid the address - // computations to be hoisted without the associated load or store. - bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, - const SmallVecInsn &InstructionsToHoist) const { - // Check whether the GEP of a ld/st can be synthesized at HoistPt. - GetElementPtrInst *Gep = nullptr; - Instruction *Val = nullptr; - if (auto *Ld = dyn_cast(Repl)) { - Gep = dyn_cast(Ld->getPointerOperand()); - } else if (auto *St = dyn_cast(Repl)) { - Gep = dyn_cast(St->getPointerOperand()); - Val = dyn_cast(St->getValueOperand()); - // Check that the stored value is available. - if (Val) { - if (isa(Val)) { - // Check whether we can compute the GEP at HoistPt. - if (!allGepOperandsAvailable(Val, HoistPt)) - return false; - } else if (!DT->dominates(Val->getParent(), HoistPt)) + // Replace uses of Gep with ClonedGep in Repl. + Repl->replaceUsesOfWith(Gep, ClonedGep); +} + +bool GVNHoist::makeGepOperandsAvailable( + Instruction *Repl, BasicBlock *HoistPt, + const SmallVecInsn &InstructionsToHoist) const { + // Check whether the GEP of a ld/st can be synthesized at HoistPt. + GetElementPtrInst *Gep = nullptr; + Instruction *Val = nullptr; + if (auto *Ld = dyn_cast(Repl)) { + Gep = dyn_cast(Ld->getPointerOperand()); + } else if (auto *St = dyn_cast(Repl)) { + Gep = dyn_cast(St->getPointerOperand()); + Val = dyn_cast(St->getValueOperand()); + // Check that the stored value is available. + if (Val) { + if (isa(Val)) { + // Check whether we can compute the GEP at HoistPt. + if (!allGepOperandsAvailable(Val, HoistPt)) return false; - } + } else if (!DT->dominates(Val->getParent(), HoistPt)) + return false; } + } - // Check whether we can compute the Gep at HoistPt. - if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) - return false; + // Check whether we can compute the Gep at HoistPt. + if (!Gep || !allGepOperandsAvailable(Gep, HoistPt)) + return false; - makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep); - if (Val && isa(Val)) - makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); + if (Val && isa(Val)) + makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val); - return true; + return true; +} + +void GVNHoist::setAlignment(Instruction *I, Instruction *Repl) { + if (auto *ReplacementLoad = dyn_cast(Repl)) { + ReplacementLoad->setAlignment( + std::min(ReplacementLoad->getAlignment(), + cast(I)->getAlignment())); + ++NumLoadsRemoved; + } else if (auto *ReplacementStore = dyn_cast(Repl)) { + ReplacementStore->setAlignment( + std::min(ReplacementStore->getAlignment(), + cast(I)->getAlignment())); + ++NumStoresRemoved; + } else if (auto *ReplacementAlloca = dyn_cast(Repl)) { + ReplacementAlloca->setAlignment( + std::max(ReplacementAlloca->getAlignment(), + cast(I)->getAlignment())); + } else if (isa(Repl)) { + ++NumCallsRemoved; } +} - std::pair hoist(HoistingPointList &HPL) { - unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; - for (const HoistingPointInfo &HP : HPL) { - // Find out whether we already have one of the instructions in HoistPt, - // in which case we do not have to move it. - BasicBlock *HoistPt = HP.first; - const SmallVecInsn &InstructionsToHoist = HP.second; - Instruction *Repl = nullptr; - for (Instruction *I : InstructionsToHoist) - if (I->getParent() == HoistPt) - // If there are two instructions in HoistPt to be hoisted in place: - // update Repl to be the first one, such that we can rename the uses - // of the second based on the first. - if (!Repl || firstInBB(I, Repl)) - Repl = I; - - // Keep track of whether we moved the instruction so we know whether we - // should move the MemoryAccess. - bool MoveAccess = true; - if (Repl) { - // Repl is already in HoistPt: it remains in place. - assert(allOperandsAvailable(Repl, HoistPt) && - "instruction depends on operands that are not available"); - MoveAccess = false; - } else { - // When we do not find Repl in HoistPt, select the first in the list - // and move it to HoistPt. - Repl = InstructionsToHoist.front(); - - // We can move Repl in HoistPt only when all operands are available. - // The order in which hoistings are done may influence the availability - // of operands. - if (!allOperandsAvailable(Repl, HoistPt)) { - - // When HoistingGeps there is nothing more we can do to make the - // operands available: just continue. - if (HoistingGeps) - continue; +void GVNHoist::removeAndReplace(Instruction *I, Instruction *Repl, MemoryAccess *NewMemAcc) { + setAlignment(I, Repl); + if (NewMemAcc) { + // Update the uses of the old MSSA access with NewMemAcc. + MemoryAccess *OldMA = MSSA->getMemoryAccess(I); + OldMA->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(OldMA); + } - // When not HoistingGeps we need to copy the GEPs. - if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) - continue; - } + Repl->andIRFlags(I); + combineKnownMetadata(Repl, I); + I->replaceAllUsesWith(Repl); + // Also invalidate the Alias Analysis cache. + MD->removeInstruction(I); + I->eraseFromParent(); +} - // Move the instruction at the end of HoistPt. - Instruction *Last = HoistPt->getTerminator(); - MD->removeInstruction(Repl); - Repl->moveBefore(Last); +void GVNHoist::removeAndReplace(const SmallVecInsn &InstructionsToHoist, + Instruction *Repl) { + MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl); + // Remove and rename all other instructions. + for (Instruction *I : InstructionsToHoist) + if (I != Repl) { + removeAndReplace(I, Repl, NewMemAcc); + } +} - DFSNumber[Repl] = DFSNumber[Last]++; +void GVNHoist::removeMPhi(MemoryAccess *NewMemAcc) { + // Remove MemorySSA phi nodes with the same arguments. + SmallPtrSet UsePhis; + for (User *U : NewMemAcc->users()) + if (MemoryPhi *Phi = dyn_cast(U)) + UsePhis.insert(Phi); + + for (auto *Phi : UsePhis) { + auto In = Phi->incoming_values(); + if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) { + Phi->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(Phi); + } + } +} + +std::pair +GVNHoist::hoist(GVNHoist::HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; + for (const HoistingPointInfo &HP : HPL) { + // Find out whether we already have one of the instructions in HoistPt, + // in which case we do not have to move it. + BasicBlock *HoistPt = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + Instruction *Repl = nullptr; + for (Instruction *I : InstructionsToHoist) + if (I->getParent() == HoistPt) + // If there are two instructions in HoistPt to be hoisted in place: + // update Repl to be the first one, such that we can rename the uses + // of the second based on the first. + if (!Repl || firstInBB(I, Repl)) + Repl = I; + + // Keep track of whether we moved the instruction so we know whether we + // should move the MemoryAccess. + bool MoveAccess = true; + if (Repl) { + // Repl is already in HoistPt: it remains in place. + assert(allOperandsAvailable(Repl, HoistPt) && + "instruction depends on operands that are not available"); + MoveAccess = false; + } else { + // When we do not find Repl in HoistPt, select the first in the list + // and move it to HoistPt. + Repl = InstructionsToHoist.front(); + + // We can move Repl in HoistPt only when all operands are available. + // The order in which hoistings are done may influence the availability + // of operands. + if (!allOperandsAvailable(Repl, HoistPt)) { + + // When HoistingGeps there is nothing more we can do to make the + // operands available: just continue. + if (HoistingGeps) + continue; + + // When not HoistingGeps we need to copy the GEPs. + if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) + continue; } - MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl); - - if (MoveAccess) { - if (MemoryUseOrDef *OldMemAcc = - dyn_cast_or_null(NewMemAcc)) { - // The definition of this ld/st will not change: ld/st hoisting is - // legal when the ld/st is not moved past its current definition. - MemoryAccess *Def = OldMemAcc->getDefiningAccess(); - NewMemAcc = - MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End); - OldMemAcc->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(OldMemAcc); - } + // Move the instruction at the end of HoistPt. + Instruction *Last = HoistPt->getTerminator(); + MD->removeInstruction(Repl); + Repl->moveBefore(Last); + + DFSNumber[Repl] = DFSNumber[Last]++; + } + + MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl); + + if (MoveAccess) { + if (MemoryUseOrDef *OldMemAcc = + dyn_cast_or_null(NewMemAcc)) { + // The definition of this ld/st will not change: ld/st hoisting is + // legal when the ld/st is not moved past its current definition. + MemoryAccess *Def = OldMemAcc->getDefiningAccess(); + NewMemAcc = MSSAUpdater->createMemoryAccessInBB(Repl, Def, HoistPt, + MemorySSA::End); + OldMemAcc->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(OldMemAcc); } + } - if (isa(Repl)) - ++NL; - else if (isa(Repl)) - ++NS; - else if (isa(Repl)) - ++NC; - else // Scalar - ++NI; - - // Remove and rename all other instructions. - for (Instruction *I : InstructionsToHoist) - if (I != Repl) { - ++NR; - if (auto *ReplacementLoad = dyn_cast(Repl)) { - ReplacementLoad->setAlignment( - std::min(ReplacementLoad->getAlignment(), - cast(I)->getAlignment())); - ++NumLoadsRemoved; - } else if (auto *ReplacementStore = dyn_cast(Repl)) { - ReplacementStore->setAlignment( - std::min(ReplacementStore->getAlignment(), - cast(I)->getAlignment())); - ++NumStoresRemoved; - } else if (auto *ReplacementAlloca = dyn_cast(Repl)) { - ReplacementAlloca->setAlignment( - std::max(ReplacementAlloca->getAlignment(), - cast(I)->getAlignment())); - } else if (isa(Repl)) { - ++NumCallsRemoved; - } + updateLocalStats(Repl, NI, NL, NS, NC); + removeAndReplace(InstructionsToHoist, Repl); + // All except one i.e., Repl are removed. + NR += InstructionsToHoist.size() -1; - if (NewMemAcc) { - // Update the uses of the old MSSA access with NewMemAcc. - MemoryAccess *OldMA = MSSA->getMemoryAccess(I); - OldMA->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(OldMA); - } + if (NewMemAcc) + removeMPhi(NewMemAcc); + } - Repl->andIRFlags(I); - combineKnownMetadata(Repl, I); - I->replaceAllUsesWith(Repl); - // Also invalidate the Alias Analysis cache. - MD->removeInstruction(I); - I->eraseFromParent(); - } + updateGlobalStats(NI, NL, NS, NC, NR); + return {NI, NL + NC + NS}; +} - // Remove MemorySSA phi nodes with the same arguments. - if (NewMemAcc) { - SmallPtrSet UsePhis; - for (User *U : NewMemAcc->users()) - if (MemoryPhi *Phi = dyn_cast(U)) - UsePhis.insert(Phi); - - for (auto *Phi : UsePhis) { - auto In = Phi->incoming_values(); - if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) { - Phi->replaceAllUsesWith(NewMemAcc); - MSSAUpdater->removeMemoryAccess(Phi); - } +// Update MemorySSA if mem-refs are sunk. +std::pair +GVNHoist::sink(GVNHoist::HoistingPointList &HPL) { + unsigned NI = 0, NL = 0, NS = 0, NC = 0; + for (const HoistingPointInfo &HP : HPL) { + BasicBlock *SinkPt = HP.first; + const SmallVecInsn &InstructionsToHoist = HP.second; + assert(InstructionsToHoist.size() == 2); + Instruction* I0 = InstructionsToHoist[0]; + Instruction* I1 = InstructionsToHoist[1]; + + // We need to check availability for only I0 as I1 will be removed. TODO: + // Additionally, we can still sink if the operand not available is also + // getting sunk will available operands. + if (!allOperandsAvailable(I0, SinkPt)) + continue; + // Keep I0 and remove I1 and PN + PHINode *PN = cast(I0->user_back()); + MemoryAccess *I0MemAccess = MSSA->getMemoryAccess(I0); + DEBUG(dbgs() << "\nSinking:" << *I0 << *I1 << *PN); + + if (I0MemAccess) { + // This MSSA update is only for mem-refs with one use in a PHI in Succ BB. + MemoryUseOrDef *I0UD = cast(I0MemAccess); + //MSSAUpdater->moveToPlace(I0UD, PN->getParent(), MemorySSA::Beginning); + MemoryAccess *Def = I0UD->getDefiningAccess(); + + assert(none_of(Def->users(), [](User *U) { return isa(U); })); + /*SmallPtrSet UsePhis; + for (User *U : Def->users()) + if (MemoryPhi *Phi = dyn_cast(U)) + UsePhis.insert(Phi);*/ + + MemoryAccess *NewMemAcc = MSSAUpdater->createMemoryAccessInBB(I0, Def, SinkPt, + MemorySSA::Beginning); + + MemoryAccess *I1MemAccess = MSSA->getMemoryAccess(I1); + I0MemAccess->replaceAllUsesWith(NewMemAcc); + I1MemAccess->replaceAllUsesWith(NewMemAcc); + + MSSAUpdater->removeMemoryAccess(I0MemAccess); + MSSAUpdater->removeMemoryAccess(I1MemAccess); + + + /*for (auto *Phi : UsePhis) { + auto In = Phi->incoming_values(); + if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) { + Phi->replaceAllUsesWith(NewMemAcc); + MSSAUpdater->removeMemoryAccess(Phi); } - } + }*/ } - NumHoisted += NL + NS + NC + NI; - NumRemoved += NR; - NumLoadsHoisted += NL; - NumStoresHoisted += NS; - NumCallsHoisted += NC; - return {NI, NL + NC + NS}; + setAlignment(I1, I0); + I0->andIRFlags(I1); + combineKnownMetadata(I0, I1); + // I1->replaceAllUsesWith(I0); Not required because I1 used once in PHI. + // Also invalidate the Alias Analysis cache. + MD->removeInstruction(I1); + MD->removeInstruction(PN); + PN->replaceAllUsesWith(I0); + // FIXME: Update Memory SSA + // Update the uses of the old MSSA access with I0. + // Remove MSSA for I1 + // Remove MSSA for PN, and replace with I0 + // Combine metadata of I0 and I1 + // Memory PHI? + //assert(0); + bool found = false; + auto It = SinkPt->begin(); + while (It != SinkPt->end()) { // Find the last PHI to insert I0 + if (!isa(It)) + break; + if (PN == &*It) + found = true; + ++It; + } + assert(found && "PHI replaced in wrong BB"); + I0->moveBefore(*SinkPt, It); + PN->eraseFromParent(); + I1->eraseFromParent(); + + NumSunk++; } + return {NI, NL + NC + NS}; +} - // Hoist all expressions. Returns Number of scalars hoisted - // and number of non-scalars hoisted. - std::pair hoistExpressions(Function &F) { - InsnInfo II; - LoadInfo LI; - StoreInfo SI; - CallInfo CI; - for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { - int InstructionNb = 0; - for (Instruction &I1 : *BB) { - // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting - // deeper may increase the register pressure and compilation time. - if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) - break; +bool GVNHoist::getVN(Instruction *I) { + // Do not value number terminator instructions. + if (isa(I)) + return false; - // Do not value number terminator instructions. - if (isa(&I1)) - break; + if (auto *Load = dyn_cast(I)) + LI.insert(Load, VN); + else if (auto *Store = dyn_cast(I)) + SI.insert(Store, VN); + else if (auto *Call = dyn_cast(I)) { + if (auto *Intr = dyn_cast(Call)) { + if (isa(Intr) || + Intr->getIntrinsicID() == Intrinsic::assume) + return true; + } + if (Call->mayHaveSideEffects()) + return false; - if (auto *Load = dyn_cast(&I1)) - LI.insert(Load, VN); - else if (auto *Store = dyn_cast(&I1)) - SI.insert(Store, VN); - else if (auto *Call = dyn_cast(&I1)) { - if (auto *Intr = dyn_cast(Call)) { - if (isa(Intr) || - Intr->getIntrinsicID() == Intrinsic::assume) - continue; - } - if (Call->mayHaveSideEffects()) - break; + if (Call->isConvergent()) + return false; + + CI.insert(Call, VN); + } else if (HoistingGeps || !isa(I)) + // Do not hoist scalars past calls that may write to memory because + // that could result in spills later. geps are handled separately. + // TODO: We can relax this for targets like AArch64 as they have more + // registers than X86. + II.insert(I, VN); + + return true; +} + +void GVNHoist::barriersAndVNs(Function &F, bool Sinking) { + for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { + int InstructionNb = 0; + bool BarrierFound = false; + SmallVector, 4> CurrentBBVNs; + for (Instruction &I1 : *BB) { + // If I1 cannot guarantee progress, subsequent instructions + // in BB cannot be hoisted anyways. + // TODO: For sink operation anything after hoist barrier is okay to sink + // but nothing before the hoist barrier. + if (!isGuaranteedToTransferExecutionToSuccessor(&I1)) { + //NumBarriers++; + HoistBarrier.insert(BB); + BarrierFound = true; + break; + } + + // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting + // deeper may increase the register pressure and compilation time. + if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB) + break; - if (Call->isConvergent()) + if (!getVN(&I1)) + break; + } + if (Sinking && BarrierFound) { + // TODO: clear VNs for this basicblock. + // Instructions after the last barriers can be sunk. + for (BasicBlock::reverse_iterator It = BB->rbegin(), Ie = BB->rend(); + It != Ie; ++It) { + if (!isGuaranteedToTransferExecutionToSuccessor(&*It)) + break; + if (!getVN(&*It)) break; + //assert(0); + } + } + } +} + +std::pair GVNHoist::hoistExpressions(Function &F) { + barriersAndVNs(F, false); + HoistingPointList HPL; + findHoistableInsn(II.getVNTable(), HPL, InsKind::Scalar); + findHoistableInsn(LI.getVNTable(), HPL, InsKind::Load); + findHoistableInsn(SI.getVNTable(), HPL, InsKind::Store); + findHoistableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar); + findHoistableInsn(CI.getLoadVNTable(), HPL, InsKind::Load); + findHoistableInsn(CI.getStoreVNTable(), HPL, InsKind::Store); + return hoist(HPL); +} + +std::pair GVNHoist::sinkExpressions(Function &F) { + barriersAndVNs(F, false); // First clear VNs before barrier in that BB. see TODO then set it to true. + HoistingPointList HPL; + findSinkableInsn(II.getVNTable(), HPL, InsKind::Scalar); + findSinkableInsn(LI.getVNTable(), HPL, InsKind::Load); + findSinkableInsn(SI.getVNTable(), HPL, InsKind::Store); + findSinkableInsn(CI.getScalarVNTable(), HPL, InsKind::Scalar); + findSinkableInsn(CI.getLoadVNTable(), HPL, InsKind::Load); + findSinkableInsn(CI.getStoreVNTable(), HPL, InsKind::Store); + return sink(HPL); +} + +void GVNHoist::constructDJGraph(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels) { + for (auto DFI = df_begin(DT->getRootNode()), DFE = df_end(DT->getRootNode()); + DFI != DFE; ++DFI) { + // Since getPathLength is inclusive of both the terminal nodes + // i.e., Entry and *DFI so decrease by 1. + unsigned Depth = DFI.getPathLength() - 1; + BasicBlock *BB = (*DFI)->getBlock(); + DomLevels[BB] = Depth; + BBLevels[Depth].push_back(BB); + for (BasicBlock *Succ : BB->getTerminator()->successors()) + if (!DT->properlyDominates(BB, Succ)) { + JEdges.insert(std::make_pair(BB, Succ)); + } + } +} - CI.insert(Call, VN); - } else if (HoistingGeps || !isa(&I1)) - // Do not hoist scalars past calls that may write to memory because - // that could result in spills later. geps are handled separately. - // TODO: We can relax this for targets like AArch64 as they have more - // registers than X86. - II.insert(&I1, VN); +bool GVNHoist::constructMergeSet(DomLevelsT &DomLevels, DenseSet &JEdges, + BBLevelT &BBLevels) { + bool Repeat = false; + DenseSet VisJEdges; // Visited J Edges. + unsigned PrevLev = 0; + for (std::pair &P : BBLevels) { + assert(PrevLev <= P.first); + PrevLev = P.first; + for (BasicBlock *CurrBB : P.second) { + for (auto PB = pred_begin(CurrBB), PE = pred_end(CurrBB); PB != PE; + ++PB) { + EdgeT Edge(*PB, CurrBB); // For all incoming edges to CurrBB. + if (JEdges.count(Edge) && !VisJEdges.count(Edge)) { + VisJEdges.insert(Edge); // Visit + BasicBlock *Src = Edge.first; + BasicBlock *Dst = Edge.second; + BasicBlock *INode = nullptr; + MergeSet[Dst].insert(Dst); // The target of JEdge. + while (DomLevels[Src] >= DomLevels[Dst]) { // A backedge. + DEBUG(dbgs() << "\nVisiting: " << Src->getName() << " -> " + << Dst->getName()); + // Merge (tmp) = Merge (tmp) U Merge (tnode) U { tnode } + // MergeSet(tnode) contains tnode. + MergeSet[Src].insert(MergeSet[Dst].begin(), MergeSet[Dst].end()); + INode = Src; + DEBUG(dbgs() << "IDom of " << Src->getName() << " is "); + Src = DT->getIDom(Src); + DEBUG(dbgs() << Src->getName()); + } + for (auto PINode = pred_begin(INode), PENode = pred_end(INode); + PINode != PENode; ++PINode) { // INode is an ancestor of SNode. + EdgeT Edge(*PINode, INode); + if (VisJEdges.count(Edge)) { + assert(JEdges.count(Edge)); + BasicBlock *SNode = *PINode; + // Check inconsistency, MergeSet[Dest] subset of MergeSet[Src] + if (!isSubset(MergeSet[INode], MergeSet[SNode])) + Repeat = true; + } + } + } } } + } + return Repeat; +} - HoistingPointList HPL; - computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); - computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); - computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); - computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); - computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); - computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); - return hoist(HPL); +const Instruction *GVNHoist::lastUser(const Instruction *I, + const Value *Val) const { + // TODO: Make isLiveOutUsingMergeSet take const parameters. + assert(!isLiveOutUsingMergeSet(const_cast(I->getParent()), + const_cast(Val))); + const BasicBlock *BB = I->getParent(); + BasicBlock::const_iterator BI(I), BE = BB->end(); + unsigned ICount = std::distance(BI, BE); + if (Val->getNumUses() <= ICount) { // Iterate on uses + for (const User *U : Val->users()) { + const Instruction *UserI = cast(U); + if (UserI != I && UserI->getParent() == BB) { + if (firstInBB(I, UserI)) // I precedes another Use => not a kill. + return UserI; + } + } + return I; } -}; + // else Iterate on Instructions + for (++BI; BI != BE; ++BI) { + for (unsigned i = 0; i < BI->getNumOperands(); ++i) + if (BI->getOperandUse(i).get() == Val) + return &*BI; + } + return I; +} -class GVNHoistLegacyPass : public FunctionPass { -public: - static char ID; +bool GVNHoist::isLiveOutUsingMergeSet(BasicBlock *BB, Value *Val) const { + assert(BB); + const BasicBlock *ValDefBB = nullptr; // BasicBlock defining Val + if (Instruction *I = dyn_cast(Val)) + ValDefBB = I->getParent(); + // FIXME! + // We are assuming when DefBB is not defined then the value is a parameter. + + // Case when Val is defined in BB, if any of the use is outside BB (DefBB) + // then it must be live-out. + if (ValDefBB == BB) + for (User *U : Val->users()) { + if (cast(U)->getParent() != BB) + return true; + } - GVNHoistLegacyPass() : FunctionPass(ID) { - initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); + // Mr(n) = M(n) U {n}; Create a new set from the merge set + // Ms(n) = Ms(n) U Mr(w); For each successor w of BB + SmallSetBB Ms; // Ms = null-set + for (BasicBlock *Succ : BB->getTerminator()->successors()) { + Ms.insert(Succ); // Mr(Succ) = Succ U M(Succ) + for (BasicBlock *BB : MergeSet.lookup(Succ)) + Ms.insert(BB); // M(Succ) } - bool runOnFunction(Function &F) override { - if (skipFunction(F)) - return false; - auto &DT = getAnalysis().getDomTree(); - auto &AA = getAnalysis().getAAResults(); - auto &MD = getAnalysis().getMemDep(); - auto &MSSA = getAnalysis().getMSSA(); + // Iterate over all the uses of Val, if any of its users is in the mergeset + // of \p BB then Val is LiveOut from BB. + for (User *U : Val->users()) { + BasicBlock *UserDefBB = nullptr; + if (Instruction *I = dyn_cast(U)) + UserDefBB = I->getParent(); + else // Assuming live-out conservatively, the user can be a global + // TODO: maybe return false is okay?? + return true; //llvm_unreachable("User is not an instruction."); + while (UserDefBB && (UserDefBB != ValDefBB)) { + if (Ms.count(UserDefBB)) // if t /\ Ms(n) then return true; + return true; + UserDefBB = DT->getIDom(UserDefBB); + } + } + return false; +} - GVNHoist G(&DT, &AA, &MD, &MSSA); - return G.run(F); +bool GVNHoist::run(Function &F) { + //NumFuncArgs = F.arg_size(); + DT->updateDFSNumbers(); + DomLevelsT DomLevels; + DenseSet JEdges; + BBLevelT BBLevels; + constructDJGraph(DomLevels, JEdges, BBLevels); + // printBBLevels(BBLevels); + DEBUG(printJEdges(JEdges)); + while (constructMergeSet(DomLevels, JEdges, BBLevels)) + ; + DEBUG(printMergeSet(MergeSet)); + VN.setDomTree(DT); + VN.setAliasAnalysis(AA); + VN.setMemDep(MD); + bool Res = false; + // Perform DFS Numbering of instructions. + unsigned BBI = 0; + for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { + DFSNumber[BB] = ++BBI; + unsigned I = 0; + for (auto &Inst : *BB) + DFSNumber[&Inst] = ++I; } - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addPreserved(); + int ChainLength = 0; + + // FIXME: use lazy evaluation of VN to avoid the fix-point computation. + while (1) { + if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength) + return Res; + clearVNTables(); + auto HoistStat = hoistExpressions(F); + if (HoistStat.first + HoistStat.second == 0) + break; + + if (HoistStat.second > 0) + // To address a limitation of the current GVN, we need to rerun the + // hoisting after we hoisted loads or stores in order to be able to + // hoist all scalars dependent on the hoisted ld/st. + VN.clear(); + + Res = true; } -}; -} // namespace + + clearVNTables(); + sinkExpressions(F); + return Res; +} + +} // namespace llvm PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) { DominatorTree &DT = AM.getResult(F); Index: llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/dj-edge-detect-das-taco.ll @@ -0,0 +1,62 @@ +; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; Testcase taken from: +; Das, Dibyendu, B. Dupont De Dinechin, and Ramakrishna Upadrasta. "Efficient +; liveness computation using merge sets and dj-graphs." ACM Transactions on +; Architecture and Code Optimization (TACO) 8.4 (2012): 27. + +; check the J edges +; CHECK: Found a JEdge: bb4 -> bb5 +; CHECK: Found a JEdge: bb5 -> bb6 +; CHECK: Found a JEdge: bb6 -> bb5 +; CHECK: Found a JEdge: bb7 -> bb2 +; CHECK: Found a JEdge: bb9 -> bb6 +; CHECK: Found a JEdge: bb10 -> bb8 + +; check the mergesets +; CHECK: MergeSet of: bb2: bb2, +; CHECK: MergeSet of: bb3: bb2, +; CHECK: MergeSet of: bb4: bb2, bb5, bb6, +; CHECK: MergeSet of: bb8: bb2, bb8, bb5, bb6, +; CHECK: MergeSet of: bb5: bb2, bb5, bb6, +; CHECK: MergeSet of: bb6: bb2, bb5, bb6, +; CHECK: MergeSet of: bb7: bb2, +; CHECK: MergeSet of: bb9: bb2, bb8, bb5, bb6, +; CHECK: MergeSet of: bb10: bb2, bb8, bb5, bb6, + + +define void @irreducible_loop() { +bb1: +br label %bb2 + +bb2: + br i1 undef, label %bb3, label %bb11 + +bb3: + br i1 undef, label %bb4, label %bb8 + +bb4: + br label %bb5 + +bb5: + br label %bb6 + +bb6: + br i1 undef, label %bb7, label %bb5 + +bb7: + br label %bb2 + +bb8: + br label %bb9 + +bb9: + br i1 undef, label %bb6, label %bb10 + +bb10: + br label %bb8 + +bb11: + ret void +} \ No newline at end of file Index: llvm/test/Transforms/GVNHoist/dj-edge-detect.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/GVNHoist/dj-edge-detect.ll @@ -0,0 +1,107 @@ +; RUN: opt -S -gvn-hoist -debug < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; check the J edges +; CHECK: Found a JEdge: bb2 -> bb3 +; CHECK: Found a JEdge: bb18 -> bb6 +; CHECK: Found a JEdge: bb15 -> bb18 +; CHECK: Found a JEdge: bb8 -> bb18 +; CHECK: Found a JEdge: bb10 -> bb18 +; CHECK: Found a JEdge: bb13 -> bb18 + + +; check the mergesets +; CHECK: MergeSet of: bb3: bb3, +; CHECK: MergeSet of: bb2: bb3, +; CHECK: MergeSet of: bb6: bb3, bb6, +; CHECK: MergeSet of: bb18: bb3, bb6, bb18, +; CHECK: MergeSet of: bb15: bb3, bb6, bb18, +; CHECK: MergeSet of: bb8: bb3, bb6, bb18, +; CHECK: MergeSet of: bb10: bb3, bb6, bb18, +; CHECK: MergeSet of: bb13: bb3, bb6, bb18, + +%0 = type { %1*, %8* } +%1 = type { %2*, %3* } +%2 = type { i32, i32, i32, i32, i32*, i8*, i32 } +%3 = type { %4, i32, i32, %7*, i8, i8, i8, i8, i8 } +%4 = type { i8, i32, i8*, %5 } +%5 = type { %6 } +%6 = type { i16 } +%7 = type { i8*, i8*, i8*, i8*, i8* } +%8 = type <{ i8*, i8*, i32, %9, %17*, %19* }> +%9 = type { %10*, i8*, i8*, i8* (i8*)*, i32* (i8*)*, i8*, %15, i32, i32, i8*, %16*, i32, i8, i8*, i8, i8* } +%10 = type { i8*, %11*, i8*, %12, %13*, i8*, i32 (i8*)*, i32, i8, %14 } +%11 = type { i8*, i32 (i8*, %12*, i8*)*, i32 (i8*)*, i32 (i8*, i32)*, %16* (i8*)*, i32 (i8*)*, i32 (i8*)*, i32 (...)*, i32 (...)*, i32 (...)*, void (i8*, i32)*, void (i8*, i32, i8*, i32)*, i32 (i8*, i32, i8*)* } +%12 = type { i8*, i32 (i8*, i32, i8*, i32, i8)*, i32 (i8*, i32, i8*)*, void (i8*, i8*, i32)*, i32 (i8*)*, i32 (i8*, i32, i32)* } +%13 = type { i32 (i8*, i8*, i32)*, i32 (i8*, i32, i32)* } +%14 = type { i32, i32, i8, i8, i8 } +%15 = type { i32, i32, i32 } +%16 = type { i16, i16, i32, i32 } +%17 = type { i8*, i8*, i16 (i8*, i8*, i16)*, i32 (i8*, i8, i32)*, i32 (%16*, i32)*, %18, i32, i32 } +%18 = type { i8*, i8* (i8*, i8*, i32)* } +%19 = type { %20, i32, i32 } +%20 = type { i32, i32, [8 x i8], i8, i8, i16 } + +@global = external hidden unnamed_addr constant [10 x i8], align 1 + +define void @ham(i8* nocapture readnone %arg) local_unnamed_addr { +bb: + %tmp = alloca %0*, align 8 + %tmp1 = bitcast %0** %tmp to i8* + br label %bb3 + +bb2: ; preds = %bb6 + br label %bb3 + +bb3: ; preds = %bb2, %bb + %tmp4 = load %0*, %0** %tmp, align 8, !tbaa !0 + %tmp5 = call i32 @quux(i8* %tmp1) + br label %bb6 + +bb6: ; preds = %bb18, %bb3 + %tmp7 = phi i32 [ %tmp5, %bb3 ], [ %tmp19, %bb18 ] + switch i32 %tmp7, label %bb15 [ + i32 0, label %bb2 + i32 35, label %bb8 + i32 11, label %bb10 + i32 12, label %bb13 + ] + +bb8: ; preds = %bb6 + %tmp9 = load %0*, %0** %tmp, align 8, !tbaa !0 + br label %bb18 + +bb10: ; preds = %bb6 + %tmp11 = load %0*, %0** %tmp, align 8, !tbaa !0 + %tmp12 = call i32 @quux(i8* %tmp1) + br label %bb18 + +bb13: ; preds = %bb6 + %tmp14 = call i32 @quux(i8* %tmp1) + br label %bb18 + +bb15: ; preds = %bb6 + %tmp16 = load %0*, %0** %tmp, align 8, !tbaa !0 + %tmp17 = call i32 @quux(i8* null) + br label %bb18 + +bb18: ; preds = %bb15, %bb13, %bb10, %bb8 + %tmp19 = phi i32 [ %tmp17, %bb15 ], [ %tmp14, %bb13 ], [ %tmp12, %bb10 ], [ 0, %bb8 ] + br label %bb6 +} + +; Function Attrs: argmemonly nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +declare i32 @zot(%0**) + +declare i32 @quux(i8*) + +declare i32 @wombat(i8* nocapture readonly, ...) + +attributes #0 = { argmemonly nounwind } + +!0 = !{!1, !1, i64 0} +!1 = !{!"any pointer", !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} Index: llvm/test/Transforms/GVNHoist/hoist-mssa.ll =================================================================== --- llvm/test/Transforms/GVNHoist/hoist-mssa.ll +++ llvm/test/Transforms/GVNHoist/hoist-mssa.ll @@ -1,7 +1,9 @@ ; RUN: opt -S -gvn-hoist < %s | FileCheck %s -; Check that store hoisting works: there should be only one store left. -; CHECK-LABEL: @getopt +; Check that store hoisting works: there should be only one store hoisted +; because load cannot be hoisted as it is not profitable. +; CHECK-LABEL: @getopt( +; CHECK: store i32 ; CHECK: store i32 ; CHECK-NOT: store i32 @@ -47,6 +49,51 @@ ret void } +; Check that store hoisting works: there should be only one store left because +; load should also be hoisted in this case. +; CHECK-LABEL: @getopt_hoist_load( +; CHECK: store i32 +; CHECK-NOT: store i32 +define void @getopt_hoist_load(i32* %p1) { +bb: + br label %bb1 + +bb1: ; preds = %bb + br i1 undef, label %bb2, label %bb3 + +bb2: ; preds = %bb1 + br label %bb13 + +bb3: ; preds = %bb1 + br i1 undef, label %bb4, label %bb9 + +bb4: ; preds = %bb3 + %tmp = load i32, i32* @optind, align 4 + br i1 undef, label %bb5, label %bb7 + +bb5: ; preds = %bb4 + %tmp6 = add nsw i32 %tmp, 1 + store i32 %tmp6, i32* %p1, align 4 + br label %bb12 + +bb7: ; preds = %bb4 + %tmp8 = add nsw i32 %tmp, 1 + store i32 %tmp8, i32* %p1, align 4 + br label %bb13 + +bb9: ; preds = %bb3 + %tmp10 = load i32, i32* @optind, align 4 + %tmp11 = add nsw i32 %tmp10, 1 + store i32 %tmp11, i32* %p1, align 4 + br label %bb12 + +bb12: ; preds = %bb9, %bb5 + br label %bb13 + +bb13: ; preds = %bb12, %bb7, %bb2 + ret void +} + @GlobalVar = internal global float 1.000000e+00 ; Check that we hoist stores and remove the MSSA phi node. Index: llvm/test/Transforms/GVNHoist/hoist-very-busy.ll =================================================================== --- llvm/test/Transforms/GVNHoist/hoist-very-busy.ll +++ llvm/test/Transforms/GVNHoist/hoist-very-busy.ll @@ -32,3 +32,24 @@ declare void @longjmp(%struct.__jmp_buf_tag*, i32) #0 attributes #0 = { noreturn nounwind } + +; Check that the call and fcmp are hoisted. +; CHECK-LABEL: define void @fun( +; CHECK: store +; CHECK-NOT: store + +define void @fun() { +entry: + br label %if.then + +if.then: ; preds = %entry + br i1 undef, label %sw0, label %sw1 + +sw0: + store i32 1, i32* @G + unreachable + +sw1: + store i32 1, i32* @G + ret void +}