Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1106,20 +1106,41 @@ return LoopUnrollResult::Unmodified; } - // Find trip count and trip multiple if count is not available + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop simplify form must have latch"); + + // Find the smallest exact constant trip count for any latch-dominating exit. + // If there is none, find the largest trip multiple instead. unsigned TripCount = 0; unsigned TripMultiple = 1; - // If there are multiple exiting blocks but one of them is the latch, use the - // latch for the trip count estimation. Otherwise insist on a single exiting - // block for the trip count estimation. - BasicBlock *ExitingBlock = L->getLoopLatch(); - if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) - ExitingBlock = L->getExitingBlock(); - if (ExitingBlock) { - TripCount = SE.getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); + BasicBlock *ExitingBlock = nullptr; + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *BB : ExitingBlocks) { + // Only unroll against latch-dominating exit blocks. + if (!DT.dominates(BB, Latch)) + continue; + + if (unsigned TC = SE.getSmallConstantTripCount(L, BB)) { + if (!TripCount || TC < TripCount) { + TripMultiple = TripCount = TC; + ExitingBlock = BB; + } + } + + if (!TripCount) { + unsigned TM = SE.getSmallConstantTripMultiple(L, BB); + if (TM > TripMultiple) { + TripMultiple = TM; + ExitingBlock = BB; + } + } } + // For runtime unrolling, fall back to using the latch exit. + if (!ExitingBlock && L->isLoopExiting(Latch)) + ExitingBlock = Latch; + // If the loop contains a convergent operation, the prelude we'd add // to do the first few instructions before we hit the unrolled loop // is unsafe -- it adds a control-flow dependency to the convergent Index: llvm/test/Transforms/LoopUnroll/full-unroll-non-latch-exit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/full-unroll-non-latch-exit.ll +++ llvm/test/Transforms/LoopUnroll/full-unroll-non-latch-exit.ll @@ -20,20 +20,30 @@ ; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 2 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 [[IV]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8 ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ true, [[LOOP_2:%.*]] ] ; CHECK-NEXT: ret i1 [[EXIT_VAL]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1 +; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1 +; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8 +; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8 +; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]] +; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2]], label [[EXIT]] +; CHECK: loop.2: +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LATCH_2:%.*]] +; CHECK: latch.2: +; CHECK-NEXT: unreachable ; start: %a1 = alloca [2 x i64], align 8 Index: llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll +++ llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll @@ -165,43 +165,35 @@ ; CHECK-NEXT: call void @bar(i32 [[TMP0]]) ; CHECK-NEXT: br label [[FOR_HEADER:%.*]] ; CHECK: for.header: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]] ] -; CHECK-NEXT: call void @bar(i32 [[TMP1]]) +; CHECK-NEXT: call void @bar(i32 [[TMP0]]) ; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[I]], 1 -; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ; CHECK: for.body.for.body_crit_edge: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; CHECK: for.body.1: -; CHECK-NEXT: [[INC_1:%.*]] = add nuw nsw i64 [[INC]], 1 -; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]] ; CHECK: for.body.for.body_crit_edge.1: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_1]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]] ; CHECK: for.body.2: -; CHECK-NEXT: [[INC_2:%.*]] = add nuw nsw i64 [[INC_1]], 1 -; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]], label [[FOR_END]] +; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]] ; CHECK: for.body.for.body_crit_edge.2: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_2]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 ; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]] ; CHECK: for.body.3: -; CHECK-NEXT: [[INC_3]] = add nuw nsw i64 [[INC_2]], 1 -; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]], label [[FOR_END]] +; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.3: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]] -; CHECK-NEXT: [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4 -; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: unreachable ; entry: %0 = load i32, i32* %A, align 4 Index: llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll +++ llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll @@ -10,20 +10,60 @@ ; CHECK-NEXT: start: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 24 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT_4:%.*]], [[LATCH_4:%.*]] ] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A1:%.*]], i64 [[IV]] ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A2:%.*]], i64 [[IV]] ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8 ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ false, [[LATCH_2:%.*]] ], [ false, [[LATCH_3:%.*]] ], [ true, [[LOOP_4:%.*]] ], [ false, [[LATCH_4]] ] ; CHECK-NEXT: ret i1 [[EXIT_VAL]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8 +; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8 +; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]] +; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]] +; CHECK: loop.2: +; CHECK-NEXT: br label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[LOAD1_2:%.*]] = load i64, i64* [[GEP1_2]], align 8 +; CHECK-NEXT: [[LOAD2_2:%.*]] = load i64, i64* [[GEP2_2]], align 8 +; CHECK-NEXT: [[EXITCOND2_2:%.*]] = icmp eq i64 [[LOAD1_2]], [[LOAD2_2]] +; CHECK-NEXT: br i1 [[EXITCOND2_2]], label [[LOOP_3:%.*]], label [[EXIT]] +; CHECK: loop.3: +; CHECK-NEXT: br label [[LATCH_3]] +; CHECK: latch.3: +; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[LOAD1_3:%.*]] = load i64, i64* [[GEP1_3]], align 8 +; CHECK-NEXT: [[LOAD2_3:%.*]] = load i64, i64* [[GEP2_3]], align 8 +; CHECK-NEXT: [[EXITCOND2_3:%.*]] = icmp eq i64 [[LOAD1_3]], [[LOAD2_3]] +; CHECK-NEXT: br i1 [[EXITCOND2_3]], label [[LOOP_4]], label [[EXIT]] +; CHECK: loop.4: +; CHECK-NEXT: [[EXITCOND_4:%.*]] = icmp eq i64 [[IV_NEXT_3]], 24 +; CHECK-NEXT: br i1 [[EXITCOND_4]], label [[EXIT]], label [[LATCH_4]] +; CHECK: latch.4: +; CHECK-NEXT: [[IV_NEXT_4]] = add nuw nsw i64 [[IV_NEXT_3]], 1 +; CHECK-NEXT: [[GEP1_4:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[GEP2_4:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[LOAD1_4:%.*]] = load i64, i64* [[GEP1_4]], align 8 +; CHECK-NEXT: [[LOAD2_4:%.*]] = load i64, i64* [[GEP2_4]], align 8 +; CHECK-NEXT: [[EXITCOND2_4:%.*]] = icmp eq i64 [[LOAD1_4]], [[LOAD2_4]] +; CHECK-NEXT: br i1 [[EXITCOND2_4]], label [[LOOP]], label [[EXIT]] ; start: br label %loop Index: llvm/test/Transforms/LoopUnroll/rebuild_lcssa.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/rebuild_lcssa.ll +++ llvm/test/Transforms/LoopUnroll/rebuild_lcssa.ll @@ -35,10 +35,12 @@ ; CHECK-NEXT: store i64 [[Y1_LCSSA]], i64* undef, align 8 ; CHECK-NEXT: br i1 false, label [[L3_LATCH:%.*]], label [[L1_LATCH:%.*]] ; CHECK: L3_latch: -; CHECK-NEXT: ret void +; CHECK-NEXT: unreachable ; CHECK: L1_latch: ; CHECK-NEXT: [[Y_LCSSA:%.*]] = phi i64 [ [[Y1_LCSSA]], [[L3_BODY]] ] ; CHECK-NEXT: br label [[L1_HEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %L1_header @@ -96,6 +98,8 @@ ; CHECK-NEXT: [[B_LCSSA:%.*]] = phi i64 [ [[A_LCSSA]], [[L3_BREAK_TO_L1]] ] ; CHECK-NEXT: br label [[L1_HEADER]] ; CHECK: L3_latch: +; CHECK-NEXT: unreachable +; CHECK: Exit: ; CHECK-NEXT: ret void ; entry: @@ -139,15 +143,16 @@ ; CHECK: L2_header: ; CHECK-NEXT: br i1 false, label [[L2_LATCH:%.*]], label [[L1_LATCH_LOOPEXIT:%.*]] ; CHECK: L2_latch: -; CHECK-NEXT: [[A_LCSSA:%.*]] = phi i8* [ [[A]], [[L2_HEADER]] ] -; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK-NEXT: unreachable ; CHECK: L1_latch.loopexit: ; CHECK-NEXT: br label [[L1_LATCH]] ; CHECK: L1_latch: ; CHECK-NEXT: [[B]] = phi i8* [ undef, [[L1_HEADER]] ], [ null, [[L1_LATCH_LOOPEXIT]] ] ; CHECK-NEXT: br label [[L1_HEADER]] +; CHECK: L2_exit: +; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: Exit: -; CHECK-NEXT: [[A_LCSSA2:%.*]] = phi i8* [ [[A_LCSSA]], [[L2_LATCH]] ] +; CHECK-NEXT: [[A_LCSSA2:%.*]] = phi i8* [ undef, [[L2_EXIT:%.*]] ] ; CHECK-NEXT: ret void ; entry: @@ -195,9 +200,11 @@ ; CHECK-NEXT: [[X_LCSSA:%.*]] = phi i1 [ false, [[L3_EXITING]] ] ; CHECK-NEXT: br i1 [[X_LCSSA]], label [[L3_LATCH:%.*]], label [[L3_LATCH]] ; CHECK: L3_latch: -; CHECK-NEXT: ret i8 0 +; CHECK-NEXT: unreachable ; CHECK: L1_latch: ; CHECK-NEXT: unreachable +; CHECK: exit: +; CHECK-NEXT: ret i8 0 ; entry: br label %L1_header Index: llvm/test/Transforms/LoopUnroll/scevunroll.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -60,30 +60,43 @@ ; SCEV unrolling properly handles loops with multiple exits. In this ; case, the computed trip count based on a canonical IV is *not* for a -; latch block. Canonical unrolling incorrectly unrolls it, but SCEV -; unrolling does not. +; latch block. define i64 @earlyLoopTest(i64* %base) nounwind { ; CHECK-LABEL: @earlyLoopTest( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TAIL:%.*]] ] -; CHECK-NEXT: [[S:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[S_NEXT:%.*]], [[TAIL]] ] -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[ADR]], align 4 -; CHECK-NEXT: [[S_NEXT]] = add i64 [[S]], [[VAL]] -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[INC]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[TAIL]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[BASE:%.*]], align 4 +; CHECK-NEXT: br label [[TAIL:%.*]] ; CHECK: tail: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0 -; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]] ; CHECK: exit1: -; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S]], [[LOOP]] ] +; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2:%.*]], [[LOOP_3:%.*]] ] ; CHECK-NEXT: ret i64 [[S_LCSSA]] ; CHECK: exit2: -; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[S_NEXT]], [[TAIL]] ] +; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1:%.*]], [[TAIL_1:%.*]] ], [ [[S_NEXT_2]], [[TAIL_2:%.*]] ] ; CHECK-NEXT: ret i64 [[S_NEXT_LCSSA1]] +; CHECK: loop.1: +; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1 +; CHECK-NEXT: [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4 +; CHECK-NEXT: [[S_NEXT_1]] = add i64 [[VAL]], [[VAL_1]] +; CHECK-NEXT: br label [[TAIL_1]] +; CHECK: tail.1: +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0 +; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT2]] +; CHECK: loop.2: +; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2 +; CHECK-NEXT: [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4 +; CHECK-NEXT: [[S_NEXT_2]] = add i64 [[S_NEXT_1]], [[VAL_2]] +; CHECK-NEXT: br label [[TAIL_2]] +; CHECK: tail.2: +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0 +; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3]], label [[EXIT2]] +; CHECK: loop.3: +; CHECK-NEXT: br i1 false, label [[TAIL_3:%.*]], label [[EXIT1:%.*]] +; CHECK: tail.3: +; CHECK-NEXT: unreachable ; entry: br label %loop @@ -115,18 +128,13 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L1:%.*]] ; CHECK: l1: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L2:%.*]] ] -; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4 -; CHECK-NEXT: br i1 false, label [[L2]], label [[EXIT1:%.*]] +; CHECK-NEXT: br i1 false, label [[L2:%.*]], label [[EXIT1:%.*]] ; CHECK: l2: -; CHECK-NEXT: br i1 true, label [[L1]], label [[EXIT2:%.*]] +; CHECK-NEXT: unreachable ; CHECK: exit1: ; CHECK-NEXT: ret i32 1 ; CHECK: exit2: -; CHECK-NEXT: [[VAL_LCSSA1:%.*]] = phi i32 [ [[VAL]], [[L2]] ] -; CHECK-NEXT: ret i32 [[VAL_LCSSA1]] +; CHECK-NEXT: ret i32 undef ; entry: br label %l1 @@ -149,32 +157,69 @@ } -; SCEV should not unroll a multi-exit loops unless the latch block has -; a known trip count, regardless of the early exit trip counts. The -; LoopUnroll utility uses this assumption to optimize the latch -; block's branch. +; SCEV can unroll a multi-exit loops even if the latch block has no +; known trip count, but an early exit has a known trip count. In this +; case we must be careful not to optimize the latch branch away. define i32 @multiExitIncomplete(i32* %base) nounwind { ; CHECK-LABEL: @multiExitIncomplete( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L1:%.*]] ; CHECK: l1: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L3:%.*]] ] -; CHECK-NEXT: [[INC1]] = add nuw i32 [[IV1]], 1 -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV1]], 5 -; CHECK-NEXT: br i1 [[CMP1]], label [[L2:%.*]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 +; CHECK-NEXT: br label [[L2:%.*]] ; CHECK: l2: -; CHECK-NEXT: br i1 true, label [[L3]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 true, label [[L3:%.*]], label [[EXIT2:%.*]] ; CHECK: l3: ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0 -; CHECK-NEXT: br i1 [[CMP3]], label [[L1]], label [[EXIT3:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]] ; CHECK: exit1: ; CHECK-NEXT: ret i32 1 ; CHECK: exit2: ; CHECK-NEXT: ret i32 2 ; CHECK: exit3: ; CHECK-NEXT: ret i32 3 +; CHECK: l1.1: +; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1 +; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4 +; CHECK-NEXT: br label [[L2_1:%.*]] +; CHECK: l2.1: +; CHECK-NEXT: br i1 true, label [[L3_1:%.*]], label [[EXIT2]] +; CHECK: l3.1: +; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0 +; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]] +; CHECK: l1.2: +; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i32, i32* [[BASE]], i32 2 +; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4 +; CHECK-NEXT: br label [[L2_2:%.*]] +; CHECK: l2.2: +; CHECK-NEXT: br i1 true, label [[L3_2:%.*]], label [[EXIT2]] +; CHECK: l3.2: +; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0 +; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]] +; CHECK: l1.3: +; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i32, i32* [[BASE]], i32 3 +; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4 +; CHECK-NEXT: br label [[L2_3:%.*]] +; CHECK: l2.3: +; CHECK-NEXT: br i1 true, label [[L3_3:%.*]], label [[EXIT2]] +; CHECK: l3.3: +; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0 +; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]] +; CHECK: l1.4: +; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr i32, i32* [[BASE]], i32 4 +; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4 +; CHECK-NEXT: br label [[L2_4:%.*]] +; CHECK: l2.4: +; CHECK-NEXT: br i1 true, label [[L3_4:%.*]], label [[EXIT2]] +; CHECK: l3.4: +; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0 +; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]] +; CHECK: l1.5: +; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]] +; CHECK: l2.5: +; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2]] +; CHECK: l3.5: +; CHECK-NEXT: unreachable ; entry: br label %l1 @@ -263,17 +308,18 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[B_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_COND:%.*]] ] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B_03]], 0 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[B_03]], 8 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[RETURN:%.*]] +; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: br i1 false, label [[RETURN]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]] ; CHECK: return: -; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] +; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ] ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 ; CHECK-NEXT: ret void +; CHECK: for.body.1: +; CHECK-NEXT: br i1 false, label [[FOR_COND_1:%.*]], label [[RETURN]] +; CHECK: for.cond.1: +; CHECK-NEXT: unreachable ; entry: br label %for.body Index: llvm/test/Transforms/LoopUnroll/unloop.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/unloop.ll +++ llvm/test/Transforms/LoopUnroll/unloop.ll @@ -18,9 +18,11 @@ ; CHECK-NEXT: [[TMP0:%.*]] = call zeroext i1 @check() ; CHECK-NEXT: br i1 true, label [[OUTER_BACKEDGE:%.*]], label [[TAIL:%.*]] ; CHECK: tail: -; CHECK-NEXT: ret void +; CHECK-NEXT: unreachable ; CHECK: outer.backedge: ; CHECK-NEXT: br label [[OUTER]] +; CHECK: exit: +; CHECK-NEXT: ret void ; entry: br label %outer @@ -483,7 +485,7 @@ ; CHECK: for.cond.i: ; CHECK-NEXT: br label [[FOR_COND_I]] ; CHECK: Proc2.exit: -; CHECK-NEXT: br label [[FOR_COND31]] +; CHECK-NEXT: unreachable ; CHECK: for.end94: ; CHECK-NEXT: ret void ; @@ -559,7 +561,7 @@ ; CHECK: while.body1694: ; CHECK-NEXT: unreachable ; CHECK: while.end1699: -; CHECK-NEXT: br label [[SW_DEFAULT1711]] +; CHECK-NEXT: unreachable ; CHECK: sw.default1711: ; CHECK-NEXT: br label [[DEFCHAR:%.*]] ; CHECK: defchar: @@ -632,6 +634,8 @@ ; CHECK: for.cond.i: ; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[FOR_COND_I]], label [[FOR_COND1_LOOPEXIT:%.*]] ; CHECK: for.cond3: +; CHECK-NEXT: unreachable +; CHECK: if.end: ; CHECK-NEXT: ret void ; entry: Index: llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll +++ llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll @@ -8,25 +8,53 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 123, [[ENTRY:%.*]] ], [ [[RES_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC9:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[I_0]] -; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[PTR]], align 2 -; CHECK-NEXT: [[RES_NEXT]] = add i16 [[RES]], [[LV]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_0]], 3 -; CHECK-NEXT: br i1 [[CMP]], label [[EXITING_1:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[RES_NEXT:%.*]] = add i16 123, [[LV]] +; CHECK-NEXT: br label [[EXITING_1:%.*]] ; CHECK: exiting.1: ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i16 [[LV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[EXITING_2:%.*]] +; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT:%.*]], label [[EXITING_2:%.*]] ; CHECK: exiting.2: ; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i16 [[LV]], [[Y:%.*]] -; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH]] +; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[INC9]] = add i64 [[I_0]], 1 -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1 +; CHECK-NEXT: [[LV_1:%.*]] = load i16, i16* [[PTR_1]], align 2 +; CHECK-NEXT: [[RES_NEXT_1:%.*]] = add i16 [[RES_NEXT]], [[LV_1]] +; CHECK-NEXT: br label [[EXITING_1_1:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ [[RES_NEXT]], [[HEADER]] ], [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1:%.*]] ], [ 0, [[EXITING_1_2:%.*]] ], [ 1, [[EXITING_2_2:%.*]] ], [ [[RES_NEXT_3:%.*]], [[LATCH_2:%.*]] ], [ 0, [[EXITING_1_3:%.*]] ], [ 1, [[EXITING_2_3:%.*]] ] ; CHECK-NEXT: ret i16 [[RES_LCSSA]] +; CHECK: exiting.1.1: +; CHECK-NEXT: [[EC_1_1:%.*]] = icmp eq i16 [[LV_1]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1]] +; CHECK: exiting.2.1: +; CHECK-NEXT: [[EC_2_1:%.*]] = icmp eq i16 [[LV_1]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_1]], label [[EXIT]], label [[LATCH_1:%.*]] +; CHECK: latch.1: +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2 +; CHECK-NEXT: [[LV_2:%.*]] = load i16, i16* [[PTR_2]], align 2 +; CHECK-NEXT: [[RES_NEXT_2:%.*]] = add i16 [[RES_NEXT_1]], [[LV_2]] +; CHECK-NEXT: br label [[EXITING_1_2]] +; CHECK: exiting.1.2: +; CHECK-NEXT: [[EC_1_2:%.*]] = icmp eq i16 [[LV_2]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2]] +; CHECK: exiting.2.2: +; CHECK-NEXT: [[EC_2_2:%.*]] = icmp eq i16 [[LV_2]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3 +; CHECK-NEXT: [[LV_3:%.*]] = load i16, i16* [[PTR_3]], align 2 +; CHECK-NEXT: [[RES_NEXT_3]] = add i16 [[RES_NEXT_2]], [[LV_3]] +; CHECK-NEXT: br i1 false, label [[EXITING_1_3]], label [[EXIT]] +; CHECK: exiting.1.3: +; CHECK-NEXT: [[EC_1_3:%.*]] = icmp eq i16 [[LV_3]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3]] +; CHECK: exiting.2.3: +; CHECK-NEXT: [[EC_2_3:%.*]] = icmp eq i16 [[LV_3]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_3]], label [[EXIT]], label [[LATCH_3:%.*]] +; CHECK: latch.3: +; CHECK-NEXT: unreachable ; entry: br label %header