Index: llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ llvm/trunk/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -123,11 +123,62 @@ // exit block. DT.changeImmediateDominator(UnswitchedNode, OldPHNode); - // Blocks reachable from the unswitched block may need to change their IDom - // as well. + // For everything that moves up the dominator tree, we need to examine the + // dominator frontier to see if it additionally should move up the dominator + // tree. This lambda appends the dominator frontier for a node on the + // worklist. + // + // Note that we don't currently use the IDFCalculator here for two reasons: + // 1) It computes dominator tree levels for the entire function on each run + // of 'compute'. While this isn't terrible, given that we expect to update + // relatively small subtrees of the domtree, it isn't necessarily the right + // tradeoff. + // 2) The interface doesn't fit this usage well. It doesn't operate in + // append-only, and builds several sets that we don't need. + // + // FIXME: Neither of these issues are a big deal and could be addressed with + // some amount of refactoring of IDFCalculator. That would allow us to share + // the core logic here (which is solving the same core problem). SmallSetVector Worklist; - for (auto *SuccBB : successors(UnswitchedBB)) - Worklist.insert(SuccBB); + SmallVector DomNodes; + SmallPtrSet DomSet; + auto AppendDomFrontier = [&](DomTreeNode *Node) { + assert(DomNodes.empty() && "Must start with no dominator nodes."); + assert(DomSet.empty() && "Must start with an empty dominator set."); + + // First flatten this subtree into sequence of nodes by doing a pre-order + // walk. + DomNodes.push_back(Node); + // We intentionally re-evaluate the size as each node can add new children. + // Because this is a tree walk, this cannot add any duplicates. + for (int i = 0; i < (int)DomNodes.size(); ++i) + DomNodes.insert(DomNodes.end(), DomNodes[i]->begin(), DomNodes[i]->end()); + + // Now create a set of the basic blocks so we can quickly test for + // dominated successors. We could in theory use the DFS numbers of the + // dominator tree for this, but we want this to remain predictably fast + // even while we mutate the dominator tree in ways that would invalidate + // the DFS numbering. + for (DomTreeNode *InnerN : DomNodes) + DomSet.insert(InnerN->getBlock()); + + // Now re-walk the nodes, appending every successor of every node that isn't + // in the set. Note that we don't append the node itself, even though if it + // is a successor it does not strictly dominate itself and thus it would be + // part of the dominance frontier. The reason we don't append it is that + // the node passed in came *from* the worklist and so it has already been + // processed. + for (DomTreeNode *InnerN : DomNodes) + for (BasicBlock *SuccBB : successors(InnerN->getBlock())) + if (!DomSet.count(SuccBB)) + Worklist.insert(SuccBB); + + DomNodes.clear(); + DomSet.clear(); + }; + + // Append the initial dom frontier nodes. + AppendDomFrontier(UnswitchedNode); // Walk the worklist. We grow the list in the loop and so must recompute size. for (int i = 0; i < (int)Worklist.size(); ++i) { @@ -136,20 +187,17 @@ DomTreeNode *Node = DT[BB]; assert(!DomChain.count(Node) && "Cannot be dominated by a block you can reach!"); - // If this block doesn't have an immediate dominator somewhere in the chain - // we hoisted over, then its position in the domtree hasn't changed. Either - // it is above the region hoisted and still valid, or it is below the - // hoisted block and so was trivially updated. This also applies to - // everything reachable from this block so we're completely done with the - // it. + + // If this block had an immediate dominator somewhere in the chain + // we hoisted over, then its position in the domtree needs to move as it is + // reachable from a node hoisted over this chain. if (!DomChain.count(Node->getIDom())) continue; - // We need to change the IDom for this node but also walk its successors - // which could have similar dominance position. DT.changeImmediateDominator(Node, OldPHNode); - for (auto *SuccBB : successors(BB)) - Worklist.insert(SuccBB); + + // Now add this node's dominator frontier to the worklist as well. + AppendDomFrontier(Node); } } Index: llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll =================================================================== --- llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll +++ llvm/trunk/test/Transforms/SimpleLoopUnswitch/trivial-unswitch.ll @@ -382,3 +382,64 @@ ; CHECK-NEXT: %[[R:.*]] = add i32 %[[R1]], %[[R2]] ; CHECK-NEXT: ret i32 %[[R]] } + +; This test, extracted from the LLVM test suite, has an interesting dominator +; tree to update as there are edges to sibling domtree nodes within child +; domtree nodes of the unswitched node. +define void @xgets(i1 %cond1, i1* %cond2.ptr) { +; CHECK-LABEL: @xgets( +entry: + br label %for.cond.preheader +; CHECK: entry: +; CHECK-NEXT: br label %for.cond.preheader + +for.cond.preheader: + br label %for.cond +; CHECK: for.cond.preheader: +; CHECK-NEXT: br i1 %cond1, label %for.cond.preheader.split, label %if.end17.thread.loopexit +; +; CHECK: for.cond.preheader.split: +; CHECK-NEXT: br label %for.cond + +for.cond: + br i1 %cond1, label %land.lhs.true, label %if.end17.thread.loopexit +; CHECK: for.cond: +; CHECK-NEXT: br label %land.lhs.true + +land.lhs.true: + br label %if.then20 +; CHECK: land.lhs.true: +; CHECK-NEXT: br label %if.then20 + +if.then20: + %cond2 = load volatile i1, i1* %cond2.ptr + br i1 %cond2, label %if.then23, label %if.else +; CHECK: if.then20: +; CHECK-NEXT: %[[COND2:.*]] = load volatile i1, i1* %cond2.ptr +; CHECK-NEXT: br i1 %[[COND2]], label %if.then23, label %if.else + +if.else: + br label %for.cond +; CHECK: if.else: +; CHECK-NEXT: br label %for.cond + +if.end17.thread.loopexit: + br label %if.end17.thread +; CHECK: if.end17.thread.loopexit: +; CHECK-NEXT: br label %if.end17.thread + +if.end17.thread: + br label %cleanup +; CHECK: if.end17.thread: +; CHECK-NEXT: br label %cleanup + +if.then23: + br label %cleanup +; CHECK: if.then23: +; CHECK-NEXT: br label %cleanup + +cleanup: + ret void +; CHECK: cleanup: +; CHECK-NEXT: ret void +}