Index: llvm/include/llvm/Analysis/IteratedDominanceFrontier.h =================================================================== --- llvm/include/llvm/Analysis/IteratedDominanceFrontier.h +++ llvm/include/llvm/Analysis/IteratedDominanceFrontier.h @@ -23,6 +23,8 @@ #ifndef LLVM_ANALYSIS_IDF_H #define LLVM_ANALYSIS_IDF_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -96,5 +98,39 @@ }; typedef IDFCalculator ForwardIDFCalculator; typedef IDFCalculator, true> ReverseIDFCalculator; + +/// Build the merge sets for a given dominator tree. The merge set of a node N +/// is the set of nodes that may need a PHI node for a definition in N. Based on +/// the algorithm from +/// +/// Dibyendu Das and U. Ramakrishna. 2005. A practical and fast iterative +// algorithm for φ-function computation using DJ graphs. +// ACM Trans. Program. Lang. Syst. 27, 3 (May 2005), 426-440. +// +/// It has been modified to not explicitly use the DJ graph data structure. +class MergeSets { + DominatorTree &DT; + + size_t NumBBs; + DenseMap Sets; + DenseMap DFSToBB; + + BitVector &getOrInsert(DomTreeNode *N) { + auto I = Sets.insert({N, BitVector(NumBBs)}); + if (I.second) + DFSToBB[N->getDFSNumIn()] = N->getBlock(); + return I.first->second; + } + + void dumpSet(DomTreeNode *N); + +public: + MergeSets(DominatorTree &DT) : DT(DT) {} + + /// Get the merge set for nodes \p DefBBs + SmallVector get(const SmallPtrSetImpl &DefBBs); + + void build(BasicBlock *Root); +}; } #endif Index: llvm/lib/Analysis/IteratedDominanceFrontier.cpp =================================================================== --- llvm/lib/Analysis/IteratedDominanceFrontier.cpp +++ llvm/lib/Analysis/IteratedDominanceFrontier.cpp @@ -13,7 +13,9 @@ #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/PredIteratorCache.h" #include +#include namespace llvm { @@ -106,4 +108,97 @@ template class IDFCalculator; template class IDFCalculator, true>; + +struct PairHasher { + std::size_t operator()(const std::pair &Key) const { + return Key.first ^ + (Key.second + (Key.first << 6) + (Key.first >> 2) + 0x9e3779b9); + } +}; +void MergeSets::build(BasicBlock *Root) { + DT.updateDFSNumbers(); + // FIXME: Currently the BitVectors are twice as big as they need to be, as + // currently DFSNumIn are used as index. + NumBBs = DT.getRootNode()->getDFSNumOut() + 1; + + PredIteratorCache PredCache; + + bool Done = false; + while (!Done) { + Done = true; + std::unordered_set, PairHasher> Visited; + std::queue WorkQueue; + WorkQueue.push(DT.getNode(Root)); + + while (!WorkQueue.empty()) { + DomTreeNode *N = WorkQueue.front(); + WorkQueue.pop(); + for (auto *C : N->getChildren()) + WorkQueue.push(C); + + getOrInsert(N); + for (BasicBlock *P : PredCache.get(N->getBlock())) { + auto *PDom = DT.getNode(P); + if (!PDom || Visited.count({PDom->getDFSNumIn(), N->getDFSNumIn()}) || + DT.properlyDominates(PDom, N)) { + continue; + } + + Visited.insert({PDom->getDFSNumIn(), N->getDFSNumIn()}); + DomTreeNode *Tmp = PDom; + DomTreeNode *LNode = nullptr; + while (Tmp->getLevel() >= N->getLevel()) { + getOrInsert(Tmp); + getOrInsert(N); + auto &TmpS = getOrInsert(Tmp); + auto &S2 = getOrInsert(N); + TmpS |= S2; + TmpS.set(N->getDFSNumIn()); + LNode = Tmp; + Tmp = Tmp->getIDom(); + } + for (BasicBlock *Src : PredCache.get(LNode->getBlock())) { + auto *SrcD = DT.getNode(Src); + if (!SrcD || + !Visited.count({SrcD->getDFSNumIn(), LNode->getDFSNumIn()}) || + DT.properlyDominates(SrcD, N)) + continue; + + unsigned SrcDCount = getOrInsert(SrcD).count(); + BitVector Both = getOrInsert(SrcD); + Both |= getOrInsert(LNode); + if (Both.count() == SrcDCount) + continue; + + Done = false; + break; + } + } + } + } +} + +SmallVector +MergeSets::get(const SmallPtrSetImpl &DefBBs) { + SmallVector Result; + BitVector M(NumBBs); + for (auto *BB : DefBBs) { + assert(DT.isReachableFromEntry(BB) && "Node not reachable."); + assert(Sets.count(DT.getNode(BB)) && "node not seen."); + M |= Sets[DT.getNode(BB)]; + } + + for (unsigned I : M.set_bits()) + Result.push_back(DFSToBB[I]); + return Result; +} + +void MergeSets::dumpSet(DomTreeNode *N) { + SmallPtrSet S; + S.insert(N->getBlock()); + dbgs() << " MS for " << N->getBlock()->getName() << " : "; + for (auto *BB : get(S)) + dbgs() << BB->getName() << " "; + dbgs() << "\n"; +} } Index: llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp =================================================================== --- llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,6 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -44,7 +43,9 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -548,12 +549,15 @@ void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); + /* if (F.getName() == "gen_pic_list_from_frame_list")*/ + // F.getParent()->dump(); + MergeSets MS(DT); + MS.build(&F.getEntryBlock()); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; - ForwardIDFCalculator IDF(DT); for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; @@ -620,7 +624,9 @@ // Unique the set of defining blocks for efficient lookup. SmallPtrSet DefBlocks; - DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); + for (auto *BB : Info.DefiningBlocks) + if (DT.isReachableFromEntry(BB)) + DefBlocks.insert(BB); // Determine which blocks the value is live in. These are blocks which lead // to uses. @@ -631,18 +637,39 @@ // the standard SSA construction algorithm. Determine which blocks need phi // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. - IDF.setLiveInBlocks(LiveInBlocks); - IDF.setDefiningBlocks(DefBlocks); - SmallVector PHIBlocks; - IDF.calculate(PHIBlocks); + auto PHIBlocks = MS.get(DefBlocks); if (PHIBlocks.size() > 1) llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { return BBNumbers.lookup(A) < BBNumbers.lookup(B); }); unsigned CurrentVersion = 0; - for (BasicBlock *BB : PHIBlocks) + unsigned NumAdded = 0; + for (BasicBlock *BB : PHIBlocks) { + if (!LiveInBlocks.count(BB)) + continue; + NumAdded++; QueuePhiNode(BB, AllocaNum, CurrentVersion); + } + +#ifndef NDEBUG + // Verify that we added the same number of PHI nodes using merge sets as + // with the iterated dominance frontier. + ForwardIDFCalculator IDF(DT); + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector IDFPHIBlocks IDF.calculate(IDFPHIBlocks); + + // Make sure both approaches result in the same number of PHIs placed. + assert(NumAdded == IDFPHIBlocks.size() && "Number of blocks mismatched"); + + SmallPtrSet PlacedBlockSet; + PlacedBlockSet.insert(PHIBlocks.begin(), PHIBlocks.end()); + + // Check all PHI blocks computed by IDF are also found using merge sets. + for (auto *BB : IDFPHIBlocks) + assert(S1.count(BB) && "missed a block"); +#endif } if (Allocas.empty()) @@ -783,6 +810,10 @@ } NewPhiNodes.clear(); + /* if (verifyFunction(F, &dbgs())) {*/ + // F.dump(); + // assert(false); + /*}*/ } /// Determine which blocks the value is live in. Index: llvm/test/Transforms/Mem2Reg/merge-sets.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/Mem2Reg/merge-sets.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mem2reg | FileCheck %s + +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br i1 undef, label [[WHILE_COND:%.*]], label [[IF_END46:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[TOP_IDX_0:%.*]] = phi i32 [ undef, [[IF_END]] ], [ [[INC16:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: br label [[LOR_RHS:%.*]] +; CHECK: lor.rhs: +; CHECK-NEXT: br i1 undef, label [[WHILE_BODY]], label [[IF_END46]] +; CHECK: while.body: +; CHECK-NEXT: [[INC16]] = add nsw i32 undef, 1 +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: if.end46: +; CHECK-NEXT: [[TOP_IDX_1:%.*]] = phi i32 [ [[TOP_IDX_0]], [[LOR_RHS]] ], [ undef, [[IF_END]] ] +; CHECK-NEXT: br label [[WHILE_COND49:%.*]] +; CHECK: while.cond49: +; CHECK-NEXT: [[IDXPROM97:%.*]] = sext i32 [[TOP_IDX_1]] to i64 +; CHECK-NEXT: br label [[WHILE_COND49]] +; +entry: + %top_idx = alloca i32, align 4 + br label %if.end + +if.end: ; preds = %entry + br i1 undef, label %while.cond, label %if.end46 + +while.cond: ; preds = %while.body, %if.end + br label %lor.rhs + +lor.rhs: ; preds = %while.cond + br i1 undef, label %while.body, label %if.end46 + +while.body: ; preds = %lor.rhs + %inc16 = add nsw i32 undef, 1 + store i32 %inc16, i32* %top_idx, align 4 + br label %while.cond + +if.end46: ; preds = %lor.rhs, %if.end + br label %while.cond49 + +while.cond49: ; preds = %while.cond49, %if.end46 + %top_idx.0.load139 = load i32, i32* %top_idx, align 4 + %idxprom97 = sext i32 %top_idx.0.load139 to i64 + br label %while.cond49 +} + +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[SW_EPILOG:%.*]] +; CHECK: sw.epilog: +; CHECK-NEXT: br i1 undef, label [[WHILE_COND85:%.*]], label [[IF_END105:%.*]] +; CHECK: while.cond85: +; CHECK-NEXT: [[__M_0:%.*]] = phi double* [ undef, [[SW_EPILOG]] ], [ [[V1:%.*]], [[IF_END98:%.*]] ] +; CHECK-NEXT: br i1 undef, label [[IF_END105]], label [[IF_END98]] +; CHECK: if.end98: +; CHECK-NEXT: [[V1]] = load double*, double** undef, align 8 +; CHECK-NEXT: br label [[WHILE_COND85]] +; CHECK: if.end105: +; CHECK-NEXT: [[__M_1:%.*]] = phi double* [ [[__M_0]], [[WHILE_COND85]] ], [ undef, [[SW_EPILOG]] ] +; CHECK-NEXT: unreachable +; +entry: + %__m = alloca double*, align 8 + br label %sw.epilog + +sw.epilog: ; preds = %entry + br i1 undef, label %while.cond85, label %if.end105 + +while.cond85: ; preds = %if.end98, %sw.epilog + br i1 undef, label %if.end105, label %if.end98 + +if.end98: ; preds = %while.cond85 + %v1 = load double*, double** undef, align 8 + store double* %v1, double** %__m, align 8 + br label %while.cond85 + +if.end105: ; preds = %while.cond85, %sw.epilog + %tmp1 = load double*, double** %__m, align 8 + unreachable +}