Index: llvm/include/llvm/Support/GenericDomTreeConstruction.h =================================================================== --- llvm/include/llvm/Support/GenericDomTreeConstruction.h +++ llvm/include/llvm/Support/GenericDomTreeConstruction.h @@ -158,9 +158,13 @@ // If IsReverse is set to true, the DFS walk will be performed backwards // relative to IsPostDom -- using reverse edges for dominators and forward // edges for postdominators. + // + // If SuccOrder is specified then in this order the DFS traverses the children + // otherwise the order is implied by the results of getChildren(). template unsigned runDFS(NodePtr V, unsigned LastNum, DescendCondition Condition, - unsigned AttachToNum) { + unsigned AttachToNum, + const DenseMap *SuccOrder = nullptr) { assert(V); SmallVector WorkList = {V}; if (NodeToInfo.count(V) != 0) NodeToInfo[V].Parent = AttachToNum; @@ -176,7 +180,14 @@ NumToNode.push_back(BB); constexpr bool Direction = IsReverse != IsPostDom; // XOR. - for (const NodePtr Succ : getChildren(BB, BatchUpdates)) { + auto Successors = getChildren(BB, BatchUpdates); + if (SuccOrder && Successors.size() > 1) + llvm::sort( + Successors.begin(), Successors.end(), [=](NodePtr A, NodePtr B) { + return SuccOrder->find(A)->second < SuccOrder->find(B)->second; + }); + + for (const NodePtr Succ : Successors) { const auto SIT = NodeToInfo.find(Succ); // Don't visit nodes more than once but remember to collect // ReverseChildren. @@ -372,6 +383,34 @@ // nodes. if (Total + 1 != Num) { HasNonTrivialRoots = true; + + // SuccOrder is the order of blocks in the function. It is needed to make + // the calculation of the FurthestAway node and the whole PostDomTree + // immune to swap successors transformation (e.g. canonicalizing branch + // predicates). SuccOrder is initialized lazily only for successors of + // reverse unreachable nodes. + Optional> SuccOrder; + auto InitSuccOrderOnce = [&]() { + SuccOrder = DenseMap(); + for (const auto Node : nodes(DT.Parent)) + if (SNCA.NodeToInfo.count(Node) == 0) + for (const auto Succ : getChildren(Node, SNCA.BatchUpdates)) + SuccOrder->try_emplace(Succ, 0); + + // Add mapping for all entries of SuccOrder. + unsigned NodeNum = 0; + for (const auto Node : nodes(DT.Parent)) { + ++NodeNum; + auto Order = SuccOrder->find(Node); + if (Order != SuccOrder->end()) { + assert(Order->second == 0); + Order->second = NodeNum; + LLVM_DEBUG(dbgs() << "\t\t\tSuccOrder " << NodeNum << ": " + << Node->getName() << "\n"); + } + } + }; + // Make another DFS pass over all other nodes to find the // reverse-unreachable blocks, and find the furthest paths we'll be able // to make. @@ -396,7 +435,12 @@ // expensive and does not always lead to a minimal set of roots. LLVM_DEBUG(dbgs() << "\t\t\tRunning forward DFS\n"); - const unsigned NewNum = SNCA.runDFS(I, Num, AlwaysDescend, Num); + if (!SuccOrder) + InitSuccOrderOnce(); + assert(SuccOrder); + + const unsigned NewNum = + SNCA.runDFS(I, Num, AlwaysDescend, Num, &*SuccOrder); const NodePtr FurthestAway = SNCA.NumToNode[NewNum]; LLVM_DEBUG(dbgs() << "\t\t\tFound a new furthest away node " << "(non-trivial root): " Index: llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/InstCombine/infinite-loop-postdom.ll @@ -0,0 +1,222 @@ +; RUN: opt %s -disable-output -branch-prob -instcombine -block-freq -verify-dom-info +; RUN: opt %s -postdomtree -analyze | FileCheck --check-prefixes=CHECK-POSTDOM %s +; RUN: opt %s -passes='print' 2>&1 | FileCheck --check-prefixes=CHECK-POSTDOM %s + +; Demonstrate that Predicate Canonicalization (InstCombine) does not invalidate PostDomTree +; if the basic block is post-dom unreachable. + +define void @test1(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B1: + %x = add i24 %a, %b + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 except the LOOP condition canonicalized (as by instcombine). +define void @test1-canonicalized(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f.not = icmp ult i24 %a, %b + br i1 %f.not, label %B2, label %B1 + +B1: + %x = add i24 %a, %b + br label %B2 + +B2: + br label %LOOP +} + +; The same as @test1 but different order of B1 and B2 in the function. +; The different order makes PostDomTree different in presense of postdom +; unreachable blocks. +define void @test2(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f = icmp uge i24 %a, %b + br i1 %f, label %B1, label %B2 + +B2: + br label %LOOP + +B1: + %x = add i24 %a, %b + br label %B2 +} + +; The same as @test2 except the LOOP condition canonicalized (as by instcombine). +define void @test2-canonicalized(i24 %a, i24 %b) { +entry: + br label %LOOP + +LOOP: + %f.not = icmp ult i24 %a, %b + br i1 %f.not, label %B2, label %B1 + +B2: + br label %LOOP + +B1: + %x = add i24 %a, %b + br label %B2 +} + +; Two reverse unreachable subgraphs with RU1* and RU2* basic blocks respectively. +define void @test3(i24 %a, i24 %b, i32 %flag) { +entry: + switch i32 %flag, label %EXIT [ + i32 1, label %RU1 + i32 2, label %RU2 + i32 3, label %RU2_B1 + ] + +RU1: + %f = icmp uge i24 %a, %b + br label %RU1_LOOP + +RU1_LOOP: + br i1 %f, label %RU1_B1, label %RU1_B2 + +RU1_B1: + %x = add i24 %a, %b + br label %RU1_B2 + +RU1_B2: + br label %RU1_LOOP + +RU2: + %f2 = icmp uge i24 %a, %b + br i1 %f2, label %RU2_B1, label %RU2_B2 + +RU2_B1: + br label %RU2_B2 + +RU2_B2: + br label %RU2_B1 + +EXIT: + ret void +} + +; The same as @test3 except the icmp conditions are canonicalized (as by instcombine). +define void @test3-canonicalized(i24 %a, i24 %b, i32 %flag) { +entry: + switch i32 %flag, label %EXIT [ + i32 1, label %RU1 + i32 2, label %RU2 + i32 3, label %RU2_B1 + ] + +RU1: + %f.not = icmp ult i24 %a, %b + br label %RU1_LOOP + +RU1_LOOP: + br i1 %f.not, label %RU1_B2, label %RU1_B1 + +RU1_B1: + %x = add i24 %a, %b + br label %RU1_B2 + +RU1_B2: + br label %RU1_LOOP + +RU2: + %f2.not = icmp ult i24 %a, %b + br i1 %f2.not, label %RU2_B2, label %RU2_B1 + +RU2_B1: + br label %RU2_B2 + +RU2_B2: + br label %RU2_B1 + +EXIT: + ret void +} + +; PostDomTrees of @test1(), @test2() and @test3() are different. +; PostDomTrees of @testX() and @testX-canonicalize() are the same. + +; CHECK-POSTDOM-LABEL: test1 +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B1 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [4] %B2 +; CHECK-POSTDOM-NEXT: Roots: %B1 + +; CHECK-POSTDOM-LABEL: test1-canonicalized +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B1 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [4] %B2 +; CHECK-POSTDOM-NEXT: Roots: %B1 + +; CHECK-POSTDOM-LABEL: test2 +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B2 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [3] %B1 +; CHECK-POSTDOM-NEXT: Roots: %B2 + +; CHECK-POSTDOM-LABEL: test2-canonicalized +; CHECK-POSTDOM-NEXT: =============================-------------------------------- +; CHECK-POSTDOM-NEXT: Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %B2 +; CHECK-POSTDOM-NEXT: [3] %LOOP +; CHECK-POSTDOM-NEXT: [4] %entry +; CHECK-POSTDOM-NEXT: [3] %B1 +; CHECK-POSTDOM-NEXT: Roots: %B2 + +; CHECK-POSTDOM-LABEL: test3 +; CHECK-POSTDOM-NEXT:=============================-------------------------------- +; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %EXIT +; CHECK-POSTDOM-NEXT: [2] %entry +; CHECK-POSTDOM-NEXT: [2] %RU1_B1 +; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP +; CHECK-POSTDOM-NEXT: [4] %RU1 +; CHECK-POSTDOM-NEXT: [4] %RU1_B2 +; CHECK-POSTDOM-NEXT: [2] %RU2_B1 +; CHECK-POSTDOM-NEXT: [3] %RU2 +; CHECK-POSTDOM-NEXT: [3] %RU2_B2 +; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1 + +; CHECK-POSTDOM-LABEL: test3-canonicalized +; CHECK-POSTDOM-NEXT:=============================-------------------------------- +; CHECK-POSTDOM-NEXT:Inorder PostDominator Tree: DFSNumbers invalid: 0 slow queries. +; CHECK-POSTDOM-NEXT: [1] <> +; CHECK-POSTDOM-NEXT: [2] %EXIT +; CHECK-POSTDOM-NEXT: [2] %entry +; CHECK-POSTDOM-NEXT: [2] %RU1_B1 +; CHECK-POSTDOM-NEXT: [3] %RU1_LOOP +; CHECK-POSTDOM-NEXT: [4] %RU1 +; CHECK-POSTDOM-NEXT: [4] %RU1_B2 +; CHECK-POSTDOM-NEXT: [2] %RU2_B1 +; CHECK-POSTDOM-NEXT: [3] %RU2 +; CHECK-POSTDOM-NEXT: [3] %RU2_B2 +; CHECK-POSTDOM-NEXT:Roots: %EXIT %RU1_B1 %RU2_B1