Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -805,10 +805,7 @@ // Now the loop blocks are cloned and the other exiting blocks from the // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the - // values from the cloned region. Also update the dominator info for - // OtherExits and their immediate successors, since we have new edges into - // OtherExits. - SmallPtrSet ImmediateSuccessorsOfExitBlocks; + // values from the cloned region. for (auto *BB : OtherExits) { for (auto &II : *BB) { @@ -843,27 +840,30 @@ "Breaks the definition of dedicated exits!"); } #endif - // Update the dominator info because the immediate dominator is no longer the - // header of the original Loop. BB has edges both from L and remainder code. - // Since the preheader determines which loop is run (L or directly jump to - // the remainder code), we set the immediate dominator as the preheader. - if (DT) { - DT->changeImmediateDominator(BB, PreHeader); - // Also update the IDom for immediate successors of BB. If the current - // IDom is the header, update the IDom to be the preheader because that is - // the nearest common dominator of all predecessors of SuccBB. We need to - // check for IDom being the header because successors of exit blocks can - // have edges from outside the loop, and we should not incorrectly update - // the IDom in that case. - for (BasicBlock *SuccBB: successors(BB)) - if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { - if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { - assert(!SuccBB->getSinglePredecessor() && - "BB should be the IDom then!"); - DT->changeImmediateDominator(SuccBB, PreHeader); - } - } + } + + // Update the immediate dominator of the exit blocks and blocks that are + // reachable from the exit blocks. This is needed because we now have paths + // from both the original loop and the remainder code reaching the exit + // blocks. While the IDom of these exit blocks were from the original loop, + // now the IDom is the preheader (which decides whether the original loop or + // remainder code should run). + if (DT && !L->getExitingBlock()) { + SmallVector ChildrenToUpdate; + // NB! We have to examine the dom children of all loop blocks, not just + // those which are the IDom of the exit blocks. This is because blocks + // reachable from the exit blocks can have their IDom as the nearest common + // dominator of the exit blocks. + for (auto *BB : L->blocks()) { + auto *DomNodeBB = DT->getNode(BB); + for (auto *DomChild : DomNodeBB->getChildren()) { + auto *DomChildBB = DomChild->getBlock(); + if (!L->contains(LI->getLoopFor(DomChildBB))) + ChildrenToUpdate.push_back(DomChildBB); + } } + for (auto *BB : ChildrenToUpdate) + DT->changeImmediateDominator(BB, PreHeader); } // Loop structure should be the following: Index: test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -124,3 +124,152 @@ exitsucc: ; preds = %headerexit ret i64 96 } + +; exit block (%default) has an exiting block and another exit block as predecessors. +define void @test4(i16 %c3) { +; CHECK-LABEL: test4 + +; CHECK-LABEL: exiting.prol: +; CHECK-NEXT: switch i16 %c3, label %default.loopexit.loopexit1 [ + +; CHECK-LABEL: exiting: +; CHECK-NEXT: switch i16 %c3, label %default.loopexit.loopexit [ + +; CHECK-LABEL: default.loopexit.loopexit: +; CHECK-NEXT: br label %default.loopexit + +; CHECK-LABEL: default.loopexit.loopexit1: +; CHECK-NEXT: br label %default.loopexit + +; CHECK-LABEL: default.loopexit: +; CHECK-NEXT: br label %default +preheader: + %c1 = zext i32 undef to i64 + br label %header + +header: ; preds = %latch, %preheader + %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %latch ] + br label %exiting + +exiting: ; preds = %header + switch i16 %c3, label %default [ + i16 45, label %otherexit + i16 95, label %latch + ] + +latch: ; preds = %exiting + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %c2 = icmp ult i64 %indvars.iv.next, %c1 + br i1 %c2, label %header, label %latchexit + +latchexit: ; preds = %latch + ret void + +default: ; preds = %otherexit, %exiting + ret void + +otherexit: ; preds = %exiting + br label %default +} + +; exit block (%exitB) has an exiting block and another exit block as predecessors. +; exiting block comes from inner loop. +define void @test5() { +; CHECK-LABEL: test5 +; CHECK-LABEL: bb1: +; CHECK-NEXT: br i1 false, label %outerH.prol.preheader, label %outerH.prol.loopexit + +; CHECK-LABEL: outerH.prol.preheader: +; CHECK-NEXT: br label %outerH.prol + +; CHECK-LABEL: outerH.prol: +; CHECK-NEXT: %tmp4.prol = phi i32 [ %tmp6.prol, %outerLatch.prol ], [ undef, %outerH.prol.preheader ] +; CHECK-NEXT: %prol.iter = phi i32 [ 0, %outerH.prol.preheader ], [ %prol.iter.sub, %outerLatch.prol ] +; CHECK-NEXT: br label %innerH.prol +bb: + %tmp = icmp sgt i32 undef, 79 + br i1 %tmp, label %outerLatchExit, label %bb1 + +bb1: ; preds = %bb + br label %outerH + +outerH: ; preds = %outerLatch, %bb1 + %tmp4 = phi i32 [ %tmp6, %outerLatch ], [ undef, %bb1 ] + br label %innerH + +innerH: ; preds = %innerLatch, %outerH + br i1 undef, label %innerexiting, label %otherexitB + +innerexiting: ; preds = %innerH + br i1 undef, label %innerLatch, label %exitB + +innerLatch: ; preds = %innerexiting + %tmp13 = fcmp olt double undef, 2.000000e+00 + br i1 %tmp13, label %innerH, label %outerLatch + +outerLatch: ; preds = %innerLatch + %tmp6 = add i32 %tmp4, 1 + %tmp7 = icmp sgt i32 %tmp6, 79 + br i1 %tmp7, label %outerLatchExit, label %outerH + +outerLatchExit: ; preds = %outerLatch, %bb + ret void + +exitB: ; preds = %innerexiting, %otherexitB + ret void + +otherexitB: ; preds = %innerH + br label %exitB + +} + +; Blocks reachable from exits (not_zero44) have the IDom as the block within the loop (Header). +; Update the IDom to the preheader. +define void @test6() { +; CHECK-LABEL: test6 +; CHECK-LABEL: header.prol.preheader: +; CHECK-NEXT: br label %header.prol + +; CHECK-LABEL: header.prol: +; CHECK-NEXT: %indvars.iv.prol = phi i64 [ undef, %header.prol.preheader ], [ %indvars.iv.next.prol, %latch.prol ] +; CHECK-NEXT: %prol.iter = phi i64 [ 1, %header.prol.preheader ], [ %prol.iter.sub, %latch.prol ] +; CHECK-NEXT: br i1 false, label %latch.prol, label %otherexit.loopexit1 + +; CHECK-LABEL: header.prol.loopexit.unr-lcssa: +; CHECK-NEXT: %indvars.iv.unr.ph = phi i64 [ %indvars.iv.next.prol, %latch.prol ] +; CHECK-NEXT: br label %header.prol.loopexit + +; CHECK-LABEL: header.prol.loopexit: +; CHECK-NEXT: %indvars.iv.unr = phi i64 [ undef, %entry ], [ %indvars.iv.unr.ph, %header.prol.loopexit.unr-lcssa ] +; CHECK-NEXT: br i1 true, label %latchexit, label %entry.new + +; CHECK-LABEL: entry.new: +; CHECK-NEXT: br label %header +entry: + br label %header + +header: ; preds = %latch, %entry + %indvars.iv = phi i64 [ undef, %entry ], [ %indvars.iv.next, %latch ] + br i1 undef, label %latch, label %otherexit + +latch: ; preds = %header + %indvars.iv.next = add nsw i64 %indvars.iv, 2 + %0 = icmp slt i64 %indvars.iv.next, 616 + br i1 %0, label %header, label %latchexit + +latchexit: ; preds = %latch + br label %latchexitsucc + +otherexit: ; preds = %header + br label %otherexitsucc + +otherexitsucc: ; preds = %otherexit + br label %not_zero44 + +not_zero44: ; preds = %latchexitsucc, %otherexitsucc + unreachable + +latchexitsucc: ; preds = %latchexit + br label %not_zero44 +} +