Index: llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/trunk/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -22,6 +22,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/LoopPass.h" @@ -736,7 +737,9 @@ // remainder are connected to the original Loop's exit blocks. The remaining // work is to update the phi nodes in the original loop, and take in the // values from the cloned region. Also update the dominator info for - // OtherExits, since we have new edges into OtherExits. + // OtherExits and their immediate successors, since we have new edges into + // OtherExits. + SmallSet ImmediateSuccessorsOfExitBlocks; for (auto *BB : OtherExits) { for (auto &II : *BB) { @@ -759,12 +762,35 @@ cast(VMap[Phi->getIncomingBlock(i)])); } } +#ifdef EXPENSIVE_CHECKS + for (BasicBlock *SuccBB : successors(BB)) { + assert(!(any_of(OtherExits, + [SuccBB](BasicBlock *EB) { return EB == SuccBB; }) || + SuccBB == LatchExit) && + "Breaks the definition of dedicated exits!"); + } +#endif // Update the dominator info because the immediate dominator is no longer the // header of the original Loop. BB has edges both from L and remainder code. // Since the preheader determines which loop is run (L or directly jump to // the remainder code), we set the immediate dominator as the preheader. - if (DT) + if (DT) { DT->changeImmediateDominator(BB, PreHeader); + // Also update the IDom for immediate successors of BB. If the current + // IDom is the header, update the IDom to be the preheader because that is + // the nearest common dominator of all predecessors of SuccBB. We need to + // check for IDom being the header because successors of exit blocks can + // have edges from outside the loop, and we should not incorrectly update + // the IDom in that case. + for (BasicBlock *SuccBB: successors(BB)) + if (ImmediateSuccessorsOfExitBlocks.insert(SuccBB).second) { + if (DT->getNode(SuccBB)->getIDom()->getBlock() == Header) { + assert(!SuccBB->getSinglePredecessor() && + "BB should be the IDom then!"); + DT->changeImmediateDominator(SuccBB, PreHeader); + } + } + } } // Loop structure should be the following: Index: llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll =================================================================== --- llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll +++ llvm/trunk/test/Transforms/LoopUnroll/runtime-loop-multiexit-dom-verify.ll @@ -0,0 +1,126 @@ +; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false -unroll-runtime-multi-exit=true -unroll-count=4 -verify-dom-info -S | FileCheck %s + +; REQUIRES: asserts +; The tests below are for verifying dom tree after runtime unrolling +; with multiple exit/exiting blocks. + +; We explicitly set the unroll count so that expensiveTripCount computation is allowed. + +; mergedexit block has edges from loop exit blocks. +define i64 @test1() { +; CHECK-LABEL: test1( +; CHECK-LABEL: headerexit: +; CHECK-NEXT: %addphi = phi i64 [ %add.iv, %header ], [ %add.iv.1, %header.1 ], [ %add.iv.2, %header.2 ], [ %add.iv.3, %header.3 ] +; CHECK-NEXT: br label %mergedexit +; CHECK-LABEL: latchexit: +; CHECK-NEXT: %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ] +; CHECK-NEXT: br label %mergedexit +; CHECK-LABEL: mergedexit: +; CHECK-NEXT: %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ] +; CHECK-NEXT: ret i64 %retval +entry: + br label %preheader + +preheader: ; preds = %bb + %trip = zext i32 undef to i64 + br label %header + +header: ; preds = %latch, %preheader + %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ] + %add.iv = add nuw nsw i64 %iv, 2 + %cmp1 = icmp ult i64 %add.iv, %trip + br i1 %cmp1, label %latch, label %headerexit + +latch: ; preds = %header + %shft = ashr i64 %add.iv, 1 + %cmp2 = icmp ult i64 %shft, %trip + br i1 %cmp2, label %header, label %latchexit + +headerexit: ; preds = %header + %addphi = phi i64 [ %add.iv, %header ] + br label %mergedexit + +latchexit: ; preds = %latch + %shftphi = phi i64 [ %shft, %latch ] + br label %mergedexit + +mergedexit: ; preds = %latchexit, %headerexit + %retval = phi i64 [ %addphi, %headerexit ], [ %shftphi, %latchexit ] + ret i64 %retval +} + +; mergedexit has edges from loop exit blocks and a block outside the loop. +define void @test2(i1 %cond, i32 %n) { +; CHECK-LABEL: header.1: +; CHECK-NEXT: %add.iv.1 = add nuw nsw i64 %add.iv, 2 +; CHECK: br i1 %cmp1.1, label %latch.1, label %headerexit +; CHECK-LABEL: latch.3: +; CHECK: %cmp2.3 = icmp ult i64 %shft.3, %trip +; CHECK-NEXT: br i1 %cmp2.3, label %header, label %latchexit, !llvm.loop +entry: + br i1 %cond, label %preheader, label %mergedexit + +preheader: ; preds = %entry + %trip = zext i32 %n to i64 + br label %header + +header: ; preds = %latch, %preheader + %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ] + %add.iv = add nuw nsw i64 %iv, 2 + %cmp1 = icmp ult i64 %add.iv, %trip + br i1 %cmp1, label %latch, label %headerexit + +latch: ; preds = %header + %shft = ashr i64 %add.iv, 1 + %cmp2 = icmp ult i64 %shft, %trip + br i1 %cmp2, label %header, label %latchexit + +headerexit: ; preds = %header + br label %mergedexit + +latchexit: ; preds = %latch + br label %mergedexit + +mergedexit: ; preds = %latchexit, %headerexit, %entry + ret void +} + + +; exitsucc is from loop exit block only. +define i64 @test3(i32 %n) { +; CHECK-LABEL: test3( +; CHECK-LABEL: headerexit: +; CHECK-NEXT: br label %exitsucc +; CHECK-LABEL: latchexit: +; CHECK-NEXT: %shftphi = phi i64 [ %shft, %latch ], [ %shft.1, %latch.1 ], [ %shft.2, %latch.2 ], [ %shft.3, %latch.3 ] +; CHECK-NEXT: ret i64 %shftphi +; CHECK-LABEL: exitsucc: +; CHECK-NEXT: ret i64 96 +entry: + br label %preheader + +preheader: ; preds = %bb + %trip = zext i32 %n to i64 + br label %header + +header: ; preds = %latch, %preheader + %iv = phi i64 [ 2, %preheader ], [ %add.iv, %latch ] + %add.iv = add nuw nsw i64 %iv, 2 + %cmp1 = icmp ult i64 %add.iv, %trip + br i1 %cmp1, label %latch, label %headerexit + +latch: ; preds = %header + %shft = ashr i64 %add.iv, 1 + %cmp2 = icmp ult i64 %shft, %trip + br i1 %cmp2, label %header, label %latchexit + +headerexit: ; preds = %header + br label %exitsucc + +latchexit: ; preds = %latch + %shftphi = phi i64 [ %shft, %latch ] + ret i64 %shftphi + +exitsucc: ; preds = %headerexit + ret i64 96 +}