diff --git a/llvm/include/llvm/Analysis/MergeSets.h b/llvm/include/llvm/Analysis/MergeSets.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/Analysis/MergeSets.h @@ -0,0 +1,89 @@ +//===- MergeSets.h - Calculate MergeSets ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// Build the merge sets for a given dominator tree. The merge set of a node N +/// is the set of nodes that may need a PHI node for a definition in N. Based on +/// the algorithm from +/// +/// Dibyendu Das and U. Ramakrishna. 2005. A practical and fast iterative +/// algorithm for φ-function computation using DJ graphs. +/// ACM Trans. Program. Lang. Syst. 27, 3 (May 2005), 426-440. +/// +/// It has been modified to not explicitly use the DJ graph data structure. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_MERGESETS_H +#define LLVM_ANALYSIS_MERGESETS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFGDiff.h" +#include "llvm/IR/Dominators.h" + +#include +namespace llvm { + +class MergeSets { + DominatorTree &DT; + DenseMap &BBNumbers; + size_t NumBBs; + + struct PairHasher { + std::size_t operator()(const std::pair &Key) const { + return Key.first ^ + (Key.second + (Key.first << 6) + (Key.first >> 2) + 0x9e3779b9); + } + }; + + std::unordered_set, PairHasher> Visited; + SmallVector Sets; + DenseMap NumToBB; + + BitVector &getSet(DomTreeNode *N) { return Sets[getNum(N)]; } + + BitVector &getSet(BasicBlock *BB) { return Sets[getNum(BB)]; } + + BitVector &getSet(unsigned i) { return Sets[i]; } + + const BitVector &getSet(unsigned i) const { return Sets[i]; } + + unsigned getNum(BasicBlock *BB) const { return BBNumbers.find(BB)->second; }; + unsigned getNum(DomTreeNode *N) const { return getNum(N->getBlock()); }; + + bool isInconsistentJ(DomTreeNode *Src, DomTreeNode *Dst) const; + + void dumpSet(DomTreeNode *N); + +public: + MergeSets(DominatorTree &DT, DenseMap &BBNumbers) + : DT(DT), BBNumbers(BBNumbers), NumBBs(BBNumbers.size()), + Sets(NumBBs, BitVector(NumBBs)) { + + // Reverse mapping from number to BB. + NumToBB.reserve(BBNumbers.size()); + for (auto &P : BBNumbers) + NumToBB[P.second] = P.first; + } + + /// Returns the merge set for nodes \p DefBBs. + SmallVector get(const SmallPtrSetImpl &DefBBs); + + /// Compute merge sets for CFG rooted at \p Root using TDMSC-I from the paper. + void build(BasicBlock *Root); + + /// Compute merge sets for CFG rooted at \p Root using TDMSC-II from the + /// paper. + void build2(BasicBlock *Root); +}; +} // namespace llvm +#endif diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt --- a/llvm/lib/Analysis/CMakeLists.txt +++ b/llvm/lib/Analysis/CMakeLists.txt @@ -65,6 +65,7 @@ MemorySSAUpdater.cpp ModuleDebugInfoPrinter.cpp ModuleSummaryAnalysis.cpp + MergeSets.cpp MustExecute.cpp ObjCARCAliasAnalysis.cpp ObjCARCAnalysisUtils.cpp diff --git a/llvm/lib/Analysis/MergeSets.cpp b/llvm/lib/Analysis/MergeSets.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Analysis/MergeSets.cpp @@ -0,0 +1,208 @@ +//===- MergeSets.cpp - Compute MergeSets ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Compute the MergeSets for a dominator tree. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/MergeSets.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PredIteratorCache.h" +#include + +namespace llvm { + +#define DEBUG_TYPE "mergesets" + +STATISTIC(Num1Round, "Number times merge sets valid after one round."); +STATISTIC(Num2Rounds, "Number times merge sets valid after two rounds."); +STATISTIC(Num3Rounds, + "Number times merge sets valid after three or more rounds."); + +bool MergeSets::isInconsistentJ(DomTreeNode *Src, DomTreeNode *Dst) const { + if (!Src || DT.properlyDominates(Src, Dst)) + return false; + + unsigned SrcNum = getNum(Src); + unsigned DstNum = getNum(Dst); + if (Visited.find({SrcNum, DstNum}) == Visited.end()) + return false; + + BitVector Both = getSet(SrcNum); + unsigned SrcDCount = Both.count(); + Both |= getSet(DstNum); + if (Both.count() == SrcDCount) + return false; + + return true; +} + +void MergeSets::build(BasicBlock *Root) { + DT.updateDFSNumbers(); + PredIteratorCache PredCache; + + bool Done = false; + unsigned Round = 0; + while (!Done) { + Round++; + Done = true; + Visited.clear(); + std::queue WorkQueue; + WorkQueue.push(DT.getNode(Root)); + + while (!WorkQueue.empty()) { + DomTreeNode *N = WorkQueue.front(); + WorkQueue.pop(); + for (auto *C : N->getChildren()) + WorkQueue.push(C); + + for (BasicBlock *P : PredCache.get(N->getBlock())) { + auto *PDom = DT.getNode(P); + if (!PDom || DT.properlyDominates(PDom, N)) + continue; + + unsigned PNum = getNum(P); + unsigned NNum = getNum(N); + if (!Visited.insert({PNum, NNum}).second) + continue; + + DomTreeNode *Tmp = PDom; + DomTreeNode *LNode = nullptr; + const auto &S2 = getSet(N); + while (Tmp->getLevel() >= N->getLevel()) { + auto &TmpS = getSet(Tmp); + TmpS |= S2; + TmpS.set(NNum); + LNode = Tmp; + Tmp = Tmp->getIDom(); + } + + // If any incoming edge is inconsistent, we need to do another round + // of the algorithm. + if (any_of(PredCache.get(LNode->getBlock()), [&LNode, this]( + BasicBlock *Src) { + return this->isInconsistentJ(this->DT.getNode(Src), LNode); + })) { + Done = false; + break; + } + } + } + } + + if (Round == 1) + Num1Round++; + else if (Round == 2) + Num2Rounds++; + else + Num3Rounds++; +} + +void MergeSets::build2(BasicBlock *Root) { + DT.updateDFSNumbers(); + + PredIteratorCache PredCache; + + bool Done = false; + unsigned Round = 0; + while (!Done) { + Round++; + Done = true; + Visited.clear(); + std::queue WorkQueue; + WorkQueue.push(DT.getNode(Root)); + + while (!WorkQueue.empty()) { + DomTreeNode *N = WorkQueue.front(); + WorkQueue.pop(); + for (auto *C : N->getChildren()) + WorkQueue.push(C); + + for (BasicBlock *P : PredCache.get(N->getBlock())) { + auto *PDom = DT.getNode(P); + if (!PDom || DT.properlyDominates(PDom, N)) + continue; + + unsigned PNum = getNum(P); + unsigned NNum = getNum(N); + if (!Visited.insert({PNum, NNum}).second) + continue; + + DomTreeNode *Tmp = PDom; + DomTreeNode *LNode = nullptr; + const auto &S2 = getSet(N); + while (Tmp->getLevel() >= N->getLevel()) { + auto &TmpS = getSet(Tmp); + TmpS |= S2; + TmpS.set(NNum); + LNode = Tmp; + Tmp = Tmp->getIDom(); + } + + for (BasicBlock *Src : PredCache.get(LNode->getBlock())) { + auto *SrcD = DT.getNode(Src); + if (!isInconsistentJ(SrcD, LNode)) + continue; + + DomTreeNode *NN = SrcD; + DomTreeNode *LNode1 = nullptr; + auto &S2 = getSet(LNode); + while (NN->getLevel() >= LNode->getLevel()) { + auto &TmpS = getSet(NN); + TmpS |= S2; + LNode1 = NN; + NN = NN->getIDom(); + } + + // If any incoming edge is inconsistent, we need to do another round + // of the algorithm. + if (any_of(PredCache.get(LNode1->getBlock()), [&LNode1, this]( + BasicBlock *Src) { + return this->isInconsistentJ(this->DT.getNode(Src), LNode1); + })) { + Done = false; + break; + } + } + } + } + } + + if (Round == 1) + Num1Round++; + else if (Round == 2) + Num2Rounds++; + else + Num3Rounds++; +} + +SmallVector +MergeSets::get(const SmallPtrSetImpl &DefBBs) { + SmallVector Result; + BitVector M(BBNumbers.size()); + for (auto *BB : DefBBs) { + assert(DT.isReachableFromEntry(BB) && "Node not reachable."); + M |= getSet(BB); + } + + for (unsigned I : M.set_bits()) + Result.push_back(NumToBB[I]); + return Result; +} + +void MergeSets::dumpSet(DomTreeNode *N) { + SmallPtrSet S; + S.insert(N->getBlock()); + dbgs() << " MS for " << N->getBlock()->getName() << " : "; + for (auto *BB : get(S)) + dbgs() << BB->getName() << " "; + dbgs() << "\n"; +} +} // namespace llvm diff --git a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp --- a/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -25,7 +25,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/MergeSets.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -44,7 +44,9 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" +#include "llvm/IR/Verifier.h" #include "llvm/Support/Casting.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" #include #include @@ -530,13 +532,14 @@ void PromoteMem2Reg::run() { Function &F = *DT.getRoot()->getParent(); - + /* if (F.getName() == "gen_pic_list_from_frame_list")*/ + // F.getParent()->dump(); AllocaDbgDeclares.resize(Allocas.size()); AllocaInfo Info; LargeBlockInfo LBI; - ForwardIDFCalculator IDF(DT); + SmallVector, 32> NeedPHIs; for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { AllocaInst *AI = Allocas[AllocaNum]; @@ -600,35 +603,72 @@ // nodes and see if we can optimize out some work by avoiding insertion of // dead phi nodes. - // Unique the set of defining blocks for efficient lookup. - SmallPtrSet DefBlocks(Info.DefiningBlocks.begin(), - Info.DefiningBlocks.end()); - - // Determine which blocks the value is live in. These are blocks which lead - // to uses. - SmallPtrSet LiveInBlocks; - ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); - - // At this point, we're committed to promoting the alloca using IDF's, and - // the standard SSA construction algorithm. Determine which blocks need phi - // nodes and see if we can optimize out some work by avoiding insertion of - // dead phi nodes. - IDF.setLiveInBlocks(LiveInBlocks); - IDF.setDefiningBlocks(DefBlocks); - SmallVector PHIBlocks; - IDF.calculate(PHIBlocks); - llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { - return BBNumbers.find(A)->second < BBNumbers.find(B)->second; - }); - - unsigned CurrentVersion = 0; - for (BasicBlock *BB : PHIBlocks) - QueuePhiNode(BB, AllocaNum, CurrentVersion); + NeedPHIs.emplace_back(AllocaNum, std::move(Info)); } if (Allocas.empty()) return; // All of the allocas must have been trivial! + if (!NeedPHIs.empty()) { + MergeSets MS(DT, BBNumbers); + MS.build2(&F.getEntryBlock()); + + for (auto &P : NeedPHIs) { + auto &Info = P.second; + unsigned AllocaNum = P.first; + // Unique the set of defining blocks for efficient lookup. + SmallPtrSet DefBlocks; + for (auto *BB : Info.DefiningBlocks) + if (DT.isReachableFromEntry(BB)) + DefBlocks.insert(BB); + + auto AI = Allocas[AllocaNum]; + + // Determine which blocks the value is live in. These are blocks which + // lead to uses. + SmallPtrSet LiveInBlocks; + ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); + + // At this point, we're committed to promoting the alloca using IDF's, and + // the standard SSA construction algorithm. Determine which blocks need + // phi nodes and see if we can optimize out some work by avoiding + // insertion of dead phi nodes. + auto PHIBlocks = MS.get(DefBlocks); + if (PHIBlocks.size() > 1) + llvm::sort(PHIBlocks, [this](BasicBlock *A, BasicBlock *B) { + return BBNumbers.lookup(A) < BBNumbers.lookup(B); + }); + + unsigned CurrentVersion = 0; + unsigned NumAdded = 0; + for (BasicBlock *BB : PHIBlocks) { + if (!LiveInBlocks.count(BB)) + continue; + NumAdded++; + QueuePhiNode(BB, AllocaNum, CurrentVersion); + } +#ifndef NDEBUG + // Verify that we added the same number of PHI nodes using merge sets as + // with the iterated dominance frontier. + ForwardIDFCalculator IDF(DT); + IDF.setLiveInBlocks(LiveInBlocks); + IDF.setDefiningBlocks(DefBlocks); + SmallVector IDFPHIBlocks; + IDF.calculate(IDFPHIBlocks); + + // Make sure both approaches result in the same number of PHIs placed. + assert(NumAdded == IDFPHIBlocks.size() && "Number of blocks mismatched"); + + SmallPtrSet PlacedBlockSet; + PlacedBlockSet.insert(PHIBlocks.begin(), PHIBlocks.end()); + + // Check all PHI blocks computed by IDF are also found using merge sets. + for (auto *BB : IDFPHIBlocks) + assert(PlacedBlockSet.count(BB) && "missed a block"); +#endif + } + } + LBI.clear(); // Set the incoming values for the basic block to be null values for all of @@ -763,6 +803,10 @@ } NewPhiNodes.clear(); + /* if (verifyFunction(F, &dbgs())) {*/ + // F.dump(); + // assert(false); + /*}*/ } /// Determine which blocks the value is live in. diff --git a/llvm/test/Transforms/Mem2Reg/merge-sets.ll b/llvm/test/Transforms/Mem2Reg/merge-sets.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Mem2Reg/merge-sets.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -mem2reg | FileCheck %s + +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[IF_END:%.*]] +; CHECK: if.end: +; CHECK-NEXT: br i1 undef, label [[WHILE_COND:%.*]], label [[IF_END46:%.*]] +; CHECK: while.cond: +; CHECK-NEXT: [[TOP_IDX_0:%.*]] = phi i32 [ undef, [[IF_END]] ], [ [[INC16:%.*]], [[WHILE_BODY:%.*]] ] +; CHECK-NEXT: br label [[LOR_RHS:%.*]] +; CHECK: lor.rhs: +; CHECK-NEXT: br i1 undef, label [[WHILE_BODY]], label [[IF_END46]] +; CHECK: while.body: +; CHECK-NEXT: [[INC16]] = add nsw i32 undef, 1 +; CHECK-NEXT: br label [[WHILE_COND]] +; CHECK: if.end46: +; CHECK-NEXT: [[TOP_IDX_1:%.*]] = phi i32 [ [[TOP_IDX_0]], [[LOR_RHS]] ], [ undef, [[IF_END]] ] +; CHECK-NEXT: br label [[WHILE_COND49:%.*]] +; CHECK: while.cond49: +; CHECK-NEXT: [[IDXPROM97:%.*]] = sext i32 [[TOP_IDX_1]] to i64 +; CHECK-NEXT: br label [[WHILE_COND49]] +; +entry: + %top_idx = alloca i32, align 4 + br label %if.end + +if.end: ; preds = %entry + br i1 undef, label %while.cond, label %if.end46 + +while.cond: ; preds = %while.body, %if.end + br label %lor.rhs + +lor.rhs: ; preds = %while.cond + br i1 undef, label %while.body, label %if.end46 + +while.body: ; preds = %lor.rhs + %inc16 = add nsw i32 undef, 1 + store i32 %inc16, i32* %top_idx, align 4 + br label %while.cond + +if.end46: ; preds = %lor.rhs, %if.end + br label %while.cond49 + +while.cond49: ; preds = %while.cond49, %if.end46 + %top_idx.0.load139 = load i32, i32* %top_idx, align 4 + %idxprom97 = sext i32 %top_idx.0.load139 to i64 + br label %while.cond49 +} + +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[SW_EPILOG:%.*]] +; CHECK: sw.epilog: +; CHECK-NEXT: br i1 undef, label [[WHILE_COND85:%.*]], label [[IF_END105:%.*]] +; CHECK: while.cond85: +; CHECK-NEXT: [[__M_0:%.*]] = phi double* [ undef, [[SW_EPILOG]] ], [ [[V1:%.*]], [[IF_END98:%.*]] ] +; CHECK-NEXT: br i1 undef, label [[IF_END105]], label [[IF_END98]] +; CHECK: if.end98: +; CHECK-NEXT: [[V1]] = load double*, double** undef, align 8 +; CHECK-NEXT: br label [[WHILE_COND85]] +; CHECK: if.end105: +; CHECK-NEXT: [[__M_1:%.*]] = phi double* [ [[__M_0]], [[WHILE_COND85]] ], [ undef, [[SW_EPILOG]] ] +; CHECK-NEXT: unreachable +; +entry: + %__m = alloca double*, align 8 + br label %sw.epilog + +sw.epilog: ; preds = %entry + br i1 undef, label %while.cond85, label %if.end105 + +while.cond85: ; preds = %if.end98, %sw.epilog + br i1 undef, label %if.end105, label %if.end98 + +if.end98: ; preds = %while.cond85 + %v1 = load double*, double** undef, align 8 + store double* %v1, double** %__m, align 8 + br label %while.cond85 + +if.end105: ; preds = %while.cond85, %sw.epilog + %tmp1 = load double*, double** %__m, align 8 + unreachable +}