diff --git a/llvm/include/llvm/Support/GenericDomTree.h b/llvm/include/llvm/Support/GenericDomTree.h --- a/llvm/include/llvm/Support/GenericDomTree.h +++ b/llvm/include/llvm/Support/GenericDomTree.h @@ -528,9 +528,9 @@ /// of CFG edges must not delete the CFG nodes before calling this function. /// /// The applyUpdates function can reorder the updates and remove redundant - /// ones internally. The batch updater is also able to detect sequences of - /// zero and exactly one update -- it's optimized to do less work in these - /// cases. + /// ones internally (as long as it is done in a deterministic fashion). The + /// batch updater is also able to detect sequences of zero and exactly one + /// update -- it's optimized to do less work in these cases. /// /// Note that for postdominators it automatically takes care of applying /// updates on reverse edges internally (so there's no need to swap the @@ -538,8 +538,8 @@ /// The type of updates is the same for DomTreeBase and PostDomTreeBase /// with the same template parameter T. /// - /// \param Updates An unordered sequence of updates to perform. The current - /// CFG and the reverse of these updates provides the pre-view of the CFG. + /// \param Updates An ordered sequence of updates to perform. The current CFG + /// and the reverse of these updates provides the pre-view of the CFG. /// void applyUpdates(ArrayRef Updates) { GraphDiff PreViewCFG( @@ -547,9 +547,9 @@ DomTreeBuilder::ApplyUpdates(*this, PreViewCFG, nullptr); } - /// \param Updates An unordered sequence of updates to perform. The current - /// CFG and the reverse of these updates provides the pre-view of the CFG. - /// \param PostViewUpdates An unordered sequence of update to perform in order + /// \param Updates An ordered sequence of updates to perform. The current CFG + /// and the reverse of these updates provides the pre-view of the CFG. + /// \param PostViewUpdates An ordered sequence of update to perform in order /// to obtain a post-view of the CFG. The DT will be updated assuming the /// obtained PostViewCFG is the desired end state. void applyUpdates(ArrayRef Updates, diff --git a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp --- a/llvm/lib/CodeGen/IndirectBrExpandPass.cpp +++ b/llvm/lib/CodeGen/IndirectBrExpandPass.cpp @@ -260,10 +260,12 @@ if (DTU) { // If there were multiple indirectbr's, they may have common successors, // but in the dominator tree, we only track unique edges. - SmallPtrSet UniqueSuccessors(BBs.begin(), BBs.end()); - Updates.reserve(Updates.size() + UniqueSuccessors.size()); - for (BasicBlock *BB : UniqueSuccessors) - Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + SmallPtrSet UniqueSuccessors; + Updates.reserve(Updates.size() + BBs.size()); + for (BasicBlock *BB : BBs) { + if (UniqueSuccessors.insert(BB).second) + Updates.push_back({DominatorTree::Insert, SwitchBB, BB}); + } DTU->applyUpdates(Updates); } diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -235,22 +235,26 @@ // These dominator edges will be redirected from Pred. std::vector Updates; if (DTU) { - SmallPtrSet SuccsOfBB(succ_begin(BB), succ_end(BB)); + // To avoid processing the same predecessor more than once. + SmallPtrSet SeenSuccs; SmallPtrSet SuccsOfPredBB(succ_begin(PredBB), succ_end(PredBB)); - Updates.reserve(Updates.size() + 2 * SuccsOfBB.size() + 1); + Updates.reserve(Updates.size() + 2 * succ_size(BB) + 1); // Add insert edges first. Experimentally, for the particular case of two // blocks that can be merged, with a single successor and single predecessor // respectively, it is beneficial to have all insert updates first. Deleting // edges first may lead to unreachable blocks, followed by inserting edges // making the blocks reachable again. Such DT updates lead to high compile // times. We add inserts before deletes here to reduce compile time. - for (BasicBlock *SuccOfBB : SuccsOfBB) + for (BasicBlock *SuccOfBB : successors(BB)) // This successor of BB may already be a PredBB's successor. if (!SuccsOfPredBB.contains(SuccOfBB)) - Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); - for (BasicBlock *SuccOfBB : SuccsOfBB) - Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Insert, PredBB, SuccOfBB}); + SeenSuccs.clear(); + for (BasicBlock *SuccOfBB : successors(BB)) + if (SeenSuccs.insert(SuccOfBB).second) + Updates.push_back({DominatorTree::Delete, BB, SuccOfBB}); Updates.push_back({DominatorTree::Delete, PredBB, BB}); } @@ -804,14 +808,14 @@ if (DTU) { SmallVector Updates; // Old dominates New. New node dominates all other nodes dominated by Old. - SmallPtrSet UniqueSuccessorsOfOld(succ_begin(New), - succ_end(New)); + SmallPtrSet UniqueSuccessorsOfOld; Updates.push_back({DominatorTree::Insert, Old, New}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfOld.size()); - for (BasicBlock *UniqueSuccessorOfOld : UniqueSuccessorsOfOld) { - Updates.push_back({DominatorTree::Insert, New, UniqueSuccessorOfOld}); - Updates.push_back({DominatorTree::Delete, Old, UniqueSuccessorOfOld}); - } + Updates.reserve(Updates.size() + 2 * succ_size(New)); + for (BasicBlock *SuccessorOfOld : successors(New)) + if (UniqueSuccessorsOfOld.insert(SuccessorOfOld).second) { + Updates.push_back({DominatorTree::Insert, New, SuccessorOfOld}); + Updates.push_back({DominatorTree::Delete, Old, SuccessorOfOld}); + } DTU->applyUpdates(Updates); } else if (DT) @@ -870,14 +874,14 @@ SmallVector DTUpdates; // New dominates Old. The predecessor nodes of the Old node dominate // New node. - SmallPtrSet UniquePredecessorsOfOld(pred_begin(New), - pred_end(New)); + SmallPtrSet UniquePredecessorsOfOld; DTUpdates.push_back({DominatorTree::Insert, New, Old}); - DTUpdates.reserve(DTUpdates.size() + 2 * UniquePredecessorsOfOld.size()); - for (BasicBlock *UniquePredecessorOfOld : UniquePredecessorsOfOld) { - DTUpdates.push_back({DominatorTree::Insert, UniquePredecessorOfOld, New}); - DTUpdates.push_back({DominatorTree::Delete, UniquePredecessorOfOld, Old}); - } + DTUpdates.reserve(DTUpdates.size() + 2 * pred_size(New)); + for (BasicBlock *PredecessorOfOld : predecessors(New)) + if (UniquePredecessorsOfOld.insert(PredecessorOfOld).second) { + DTUpdates.push_back({DominatorTree::Insert, PredecessorOfOld, New}); + DTUpdates.push_back({DominatorTree::Delete, PredecessorOfOld, Old}); + } DTU->applyUpdates(DTUpdates); @@ -910,13 +914,14 @@ } else { // Split block expects NewBB to have a non-empty set of predecessors. SmallVector Updates; - SmallPtrSet UniquePreds(Preds.begin(), Preds.end()); + SmallPtrSet UniquePreds; Updates.push_back({DominatorTree::Insert, NewBB, OldBB}); - Updates.reserve(Updates.size() + 2 * UniquePreds.size()); - for (auto *UniquePred : UniquePreds) { - Updates.push_back({DominatorTree::Insert, UniquePred, NewBB}); - Updates.push_back({DominatorTree::Delete, UniquePred, OldBB}); - } + Updates.reserve(Updates.size() + 2 * Preds.size()); + for (auto *Pred : Preds) + if (UniquePreds.insert(Pred).second) { + Updates.push_back({DominatorTree::Insert, Pred, NewBB}); + Updates.push_back({DominatorTree::Delete, Pred, OldBB}); + } DTU->applyUpdates(Updates); } } else if (DT) { @@ -1376,14 +1381,14 @@ BasicBlock *Head = SplitBefore->getParent(); BasicBlock *Tail = Head->splitBasicBlock(SplitBefore->getIterator()); if (DTU) { - SmallPtrSet UniqueSuccessorsOfHead(succ_begin(Tail), - succ_end(Tail)); + SmallPtrSet UniqueSuccessorsOfHead; Updates.push_back({DominatorTree::Insert, Head, Tail}); - Updates.reserve(Updates.size() + 2 * UniqueSuccessorsOfHead.size()); - for (BasicBlock *UniqueSuccessorOfHead : UniqueSuccessorsOfHead) { - Updates.push_back({DominatorTree::Insert, Tail, UniqueSuccessorOfHead}); - Updates.push_back({DominatorTree::Delete, Head, UniqueSuccessorOfHead}); - } + Updates.reserve(Updates.size() + 2 * succ_size(Tail)); + for (BasicBlock *SuccessorOfHead : successors(Tail)) + if (UniqueSuccessorsOfHead.insert(SuccessorOfHead).second) { + Updates.push_back({DominatorTree::Insert, Tail, SuccessorOfHead}); + Updates.push_back({DominatorTree::Delete, Head, SuccessorOfHead}); + } } Instruction *HeadOldTerm = Head->getTerminator(); LLVMContext &C = Head->getContext(); diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp --- a/llvm/lib/Transforms/Utils/Local.cpp +++ b/llvm/lib/Transforms/Utils/Local.cpp @@ -760,15 +760,18 @@ SmallVector Updates; if (DTU) { - SmallPtrSet PredsOfPredBB(pred_begin(PredBB), - pred_end(PredBB)); - Updates.reserve(Updates.size() + 2 * PredsOfPredBB.size() + 1); - for (BasicBlock *PredOfPredBB : PredsOfPredBB) + // To avoid processing the same predecessor more than once. + SmallPtrSet SeenPreds; + Updates.reserve(Updates.size() + 2 * pred_size(PredBB) + 1); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) // This predecessor of PredBB may already have DestBB as a successor. if (PredOfPredBB != PredBB) - Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); - for (BasicBlock *PredOfPredBB : PredsOfPredBB) - Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Insert, PredOfPredBB, DestBB}); + SeenPreds.clear(); + for (BasicBlock *PredOfPredBB : predecessors(PredBB)) + if (SeenPreds.insert(PredOfPredBB).second) + Updates.push_back({DominatorTree::Delete, PredOfPredBB, PredBB}); Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); } @@ -1096,16 +1099,20 @@ SmallVector Updates; if (DTU) { + // To avoid processing the same predecessor more than once. + SmallPtrSet SeenPreds; // All predecessors of BB will be moved to Succ. - SmallPtrSet PredsOfBB(pred_begin(BB), pred_end(BB)); SmallPtrSet PredsOfSucc(pred_begin(Succ), pred_end(Succ)); - Updates.reserve(Updates.size() + 2 * PredsOfBB.size() + 1); - for (auto *PredOfBB : PredsOfBB) + Updates.reserve(Updates.size() + 2 * pred_size(BB) + 1); + for (auto *PredOfBB : predecessors(BB)) // This predecessor of BB may already have Succ as a successor. if (!PredsOfSucc.contains(PredOfBB)) - Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); - for (auto *PredOfBB : PredsOfBB) - Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Insert, PredOfBB, Succ}); + SeenPreds.clear(); + for (auto *PredOfBB : predecessors(BB)) + if (SeenPreds.insert(PredOfBB).second) + Updates.push_back({DominatorTree::Delete, PredOfBB, BB}); Updates.push_back({DominatorTree::Delete, BB, Succ}); } diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -3936,7 +3936,7 @@ BasicBlock *KeepEdge1 = TrueBB; BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr; - SmallPtrSet RemovedSuccessors; + SmallSetVector RemovedSuccessors; // Then remove the rest. for (BasicBlock *Succ : successors(OldTerm)) { @@ -4947,10 +4947,14 @@ // Gather dead cases. SmallVector DeadCases; SmallDenseMap NumPerSuccessorCases; + SmallVector UniqueSuccessors; for (auto &Case : SI->cases()) { auto *Successor = Case.getCaseSuccessor(); - if (DTU) + if (DTU) { + if (!NumPerSuccessorCases.count(Successor)) + UniqueSuccessors.push_back(Successor); ++NumPerSuccessorCases[Successor]; + } const APInt &CaseVal = Case.getCaseValue()->getValue(); if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) || (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) { @@ -4993,9 +4997,9 @@ if (DTU) { std::vector Updates; - for (const std::pair &I : NumPerSuccessorCases) - if (I.second == 0) - Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first}); + for (auto *Successor : UniqueSuccessors) + if (NumPerSuccessorCases[Successor] == 0) + Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor}); DTU->applyUpdates(Updates); } @@ -6060,15 +6064,13 @@ if (Succ == SI->getDefaultDest()) continue; Succ->removePredecessor(BB); - RemovedSuccessors.insert(Succ); + if (DTU && RemovedSuccessors.insert(Succ).second) + Updates.push_back({DominatorTree::Delete, BB, Succ}); } SI->eraseFromParent(); - if (DTU) { - for (BasicBlock *RemovedSuccessor : RemovedSuccessors) - Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor}); + if (DTU) DTU->applyUpdates(Updates); - } ++NumLookupTables; if (NeedMask) @@ -6235,7 +6237,7 @@ // Eliminate redundant destinations. SmallPtrSet Succs; - SmallPtrSet RemovedSuccs; + SmallSetVector RemovedSuccs; for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { BasicBlock *Dest = IBI->getDestination(i); if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) { @@ -6325,8 +6327,8 @@ // We've found an identical block. Update our predecessors to take that // path instead and make ourselves dead. - SmallPtrSet Preds(pred_begin(BB), pred_end(BB)); - for (BasicBlock *Pred : Preds) { + SmallSetVector UniquePreds(pred_begin(BB), pred_end(BB)); + for (BasicBlock *Pred : UniquePreds) { InvokeInst *II = cast(Pred->getTerminator()); assert(II->getNormalDest() != BB && II->getUnwindDest() == BB && "unexpected successor"); @@ -6343,8 +6345,8 @@ if (isa(Inst)) Inst.eraseFromParent(); - SmallPtrSet Succs(succ_begin(BB), succ_end(BB)); - for (BasicBlock *Succ : Succs) { + SmallSetVector UniqueSuccs(succ_begin(BB), succ_end(BB)); + for (BasicBlock *Succ : UniqueSuccs) { Succ->removePredecessor(BB); if (DTU) Updates.push_back({DominatorTree::Delete, BB, Succ}); diff --git a/llvm/test/Transforms/JumpThreading/domtree-updates.ll b/llvm/test/Transforms/JumpThreading/domtree-updates.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/JumpThreading/domtree-updates.ll @@ -0,0 +1,111 @@ +; RUN: opt < %s -disable-output -passes='jump-threading,print' 2>&1 | FileCheck %s + +; REQUIRES: asserts + +; The idea behind this test case is to verify that the dominator tree is +; updated in a deterministic way. Optimizations, at least EarlyCSE, are +; iterating the vectors that hold child nodes in the DominatorTree. Thus, the +; end result might differ depending on the order in which nodes are inserted +; in the dominator tree. Unfortunately this test case is quite large, but it +; happened to trigger a non-determinism quite often when being executed +; multipe times (it was possible to see varying results when running the test +; less that 10 times in a row). +; The actual problem was tracked down to llvm::MergeBasicBlockIntoOnlyPred, so +; the important property of the test is probably that it triggers a call to +; that function, and that the PredsOfPredBB set that is used to populate +; Updates for the DomTreeUpdater is populated with more than one entry. + +; CHECK: Inorder Dominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-NEXT: [1] %entry {4294967295,4294967295} [0] +; CHECK-NEXT: [2] %for.cond1 {4294967295,4294967295} [1] +; CHECK-NEXT: [3] %for.inc19 {4294967295,4294967295} [2] +; CHECK-NEXT: [3] %if.then {4294967295,4294967295} [2] +; CHECK-NEXT: [4] %for.cond5.preheader {4294967295,4294967295} [3] +; CHECK-NEXT: [5] %cleanup {4294967295,4294967295} [4] +; CHECK-NEXT: [6] %cleanup16 {4294967295,4294967295} [5] +; CHECK-NEXT: [7] %unreachable {4294967295,4294967295} [6] +; CHECK-NEXT: [7] %for.end21 {4294967295,4294967295} [6] +; CHECK-NEXT: [5] %for.body7 {4294967295,4294967295} [4] +; CHECK-NEXT: [6] %for.inc {4294967295,4294967295} [5] +; CHECK-NEXT: [5] %return {4294967295,4294967295} [4] +; CHECK-NEXT: [3] %cleanup16.thread {4294967295,4294967295} [2] +; CHECK-NEXT: [2] %infinite.loop {4294967295,4294967295} [1] +; CHECK-NEXT: Roots: %entry + + +@a = dso_local local_unnamed_addr global i16 0, align 1 + +; Function Attrs: nounwind +define dso_local i16 @g(i16 %a0, i16 %a1, i16 %a2, i16 %a3) local_unnamed_addr { +entry: + %tobool.not = icmp eq i16 %a0, 0 + br i1 %tobool.not, label %for.cond1, label %infinite.loop + +infinite.loop: ; preds = %infinite.loop, %entry + br label %infinite.loop + +for.cond1: ; preds = %for.inc19, %entry + %retval.0 = phi i16 [ %retval.3, %for.inc19 ], [ undef, %entry ] + %i.0 = phi i16 [ %i.3, %for.inc19 ], [ undef, %entry ] + %tobool2.not = icmp eq i16 %a1, 0 + br i1 %tobool2.not, label %if.end15, label %if.then + +if.then: ; preds = %for.cond1 + %tobool3.not = icmp eq i16 %a2, 0 + br i1 %tobool3.not, label %if.end15, label %for.cond5.preheader + +for.cond5.preheader: ; preds = %if.then + %tobool8.not = icmp eq i16 %a3, 0 + %tobool6.not31 = icmp eq i16 %i.0, 0 + br i1 %tobool6.not31, label %for.end10, label %for.body7 + +for.body7: ; preds = %for.inc, %for.cond5.preheader + %i.132 = phi i16 [ %inc, %for.inc ], [ %i.0, %for.cond5.preheader ] + br i1 %tobool8.not, label %for.inc, label %cleanup + +for.inc: ; preds = %for.body7 + %inc = add i16 %i.132, 1 + %tobool6.not = icmp eq i16 %inc, 0 + br i1 %tobool6.not, label %for.end10, label %for.body7 + +for.end10: ; preds = %for.inc, %for.cond5.preheader + %i.1.lcssa = phi i16 [ %i.0, %for.cond5.preheader ], [ 0, %for.inc ] + %.26 = select i1 %tobool8.not, i32 0, i32 4 + br label %cleanup + +cleanup: ; preds = %for.end10, %for.body7 + %i.128 = phi i16 [ %i.1.lcssa, %for.end10 ], [ %i.0, %for.body7 ] + %retval.1 = phi i16 [ %retval.0, %for.end10 ], [ 1, %for.body7 ] + %cond = phi i1 [ %tobool8.not, %for.end10 ], [ false, %for.body7 ] + %cleanup.dest.slot.0 = phi i32 [ %.26, %for.end10 ], [ 1, %for.body7 ] + br i1 %cond, label %if.end15, label %cleanup16 + +if.end15: ; preds = %cleanup, %if.then, %for.cond1 + %retval.2 = phi i16 [ %retval.1, %cleanup ], [ %retval.0, %if.then ], [ %retval.0, %for.cond1 ] + %i.2 = phi i16 [ %i.128, %cleanup ], [ %i.0, %if.then ], [ %i.0, %for.cond1 ] + store i16 0, i16* @a, align 1 + br label %cleanup16 + +cleanup16: ; preds = %if.end15, %cleanup + %retval.3 = phi i16 [ %retval.2, %if.end15 ], [ %retval.1, %cleanup ] + %i.3 = phi i16 [ %i.2, %if.end15 ], [ %i.128, %cleanup ] + %cleanup.dest.slot.1 = phi i32 [ 0, %if.end15 ], [ %cleanup.dest.slot.0, %cleanup ] + switch i32 %cleanup.dest.slot.1, label %unreachable [ + i32 0, label %for.inc19 + i32 1, label %return + i32 4, label %for.end21 + ] + +for.inc19: ; preds = %cleanup16 + br label %for.cond1 + +for.end21: ; preds = %cleanup16 + br label %return + +return: ; preds = %for.end21, %cleanup16 + %retval.4 = phi i16 [ 17, %for.end21 ], [ %retval.3, %cleanup16 ] + ret i16 %retval.4 + +unreachable: ; preds = %cleanup16 + unreachable +}