Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1120,18 +1120,27 @@ return LoopUnrollResult::Unmodified; } - // Find trip count and trip multiple if count is not available + // Find the smallest exact trip count for any exit. An unroll by this trip + // count will eliminate at least one exit, but there may be additional exits + // beyond it. unsigned TripCount = 0; unsigned TripMultiple = 1; - // If there are multiple exiting blocks but one of them is the latch, use the - // latch for the trip count estimation. Otherwise insist on a single exiting - // block for the trip count estimation. - BasicBlock *ExitingBlock = L->getLoopLatch(); - if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) - ExitingBlock = L->getExitingBlock(); - if (ExitingBlock) { - TripCount = SE.getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) + if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock)) + if (!TripCount || TC < TripCount) + TripCount = TripMultiple = TC; + + if (!TripCount) { + // If no exact trip count is known, determine the trip multiple of either + // the loop latch or the single exiting block. + // TODO: Relax for multiple exits. + BasicBlock *ExitingBlock = L->getLoopLatch(); + if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) + ExitingBlock = L->getExitingBlock(); + if (ExitingBlock) + TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); } // If the loop contains a convergent operation, the prelude we'd add Index: llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll +++ llvm/test/Transforms/LoopUnroll/full-unroll-one-unpredictable-exit.ll @@ -87,20 +87,30 @@ ; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 2 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 [[IV]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8 ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ true, [[LOOP_2:%.*]] ], [ false, [[LATCH_2:%.*]] ] ; CHECK-NEXT: ret i1 [[EXIT_VAL]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1 +; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1 +; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8 +; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8 +; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]] +; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2]], label [[EXIT]] +; CHECK: loop.2: +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: br label [[EXIT]] ; start: %a1 = alloca [2 x i64], align 8 Index: llvm/test/Transforms/LoopUnroll/multiple-exits.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/multiple-exits.ll +++ llvm/test/Transforms/LoopUnroll/multiple-exits.ll @@ -72,24 +72,96 @@ ret void } -; TODO: We should fully unroll this by 10, leave the unrolled latch +; Fully unroll this loop by 10, but leave the unrolled latch ; tests since we don't know if %N < 10, and break the backedge. define void @test2(i64 %N) { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: [[CMP1:%.*]] = icmp ule i64 [[IV]], 10 -; CHECK-NEXT: br i1 [[CMP1]], label [[LATCH]], label [[EXIT:%.*]] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: ; CHECK-NEXT: call void @bar() -; CHECK-NEXT: [[CMP2:%.*]] = icmp ule i64 [[IV]], [[N:%.*]] -; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 true, label [[LOOP_1:%.*]], label [[EXIT:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret void +; CHECK: loop.1: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_1:%.*]] +; CHECK: latch.1: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ule i64 1, [[N:%.*]] +; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT]] +; CHECK: loop.2: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_2:%.*]] +; CHECK: latch.2: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ule i64 2, [[N]] +; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3:%.*]], label [[EXIT]] +; CHECK: loop.3: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_3:%.*]] +; CHECK: latch.3: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_3:%.*]] = icmp ule i64 3, [[N]] +; CHECK-NEXT: br i1 [[CMP2_3]], label [[LOOP_4:%.*]], label [[EXIT]] +; CHECK: loop.4: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_4:%.*]] +; CHECK: latch.4: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_4:%.*]] = icmp ule i64 4, [[N]] +; CHECK-NEXT: br i1 [[CMP2_4]], label [[LOOP_5:%.*]], label [[EXIT]] +; CHECK: loop.5: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_5:%.*]] +; CHECK: latch.5: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_5:%.*]] = icmp ule i64 5, [[N]] +; CHECK-NEXT: br i1 [[CMP2_5]], label [[LOOP_6:%.*]], label [[EXIT]] +; CHECK: loop.6: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_6:%.*]] +; CHECK: latch.6: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_6:%.*]] = icmp ule i64 6, [[N]] +; CHECK-NEXT: br i1 [[CMP2_6]], label [[LOOP_7:%.*]], label [[EXIT]] +; CHECK: loop.7: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_7:%.*]] +; CHECK: latch.7: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_7:%.*]] = icmp ule i64 7, [[N]] +; CHECK-NEXT: br i1 [[CMP2_7]], label [[LOOP_8:%.*]], label [[EXIT]] +; CHECK: loop.8: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_8:%.*]] +; CHECK: latch.8: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_8:%.*]] = icmp ule i64 8, [[N]] +; CHECK-NEXT: br i1 [[CMP2_8]], label [[LOOP_9:%.*]], label [[EXIT]] +; CHECK: loop.9: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_9:%.*]] +; CHECK: latch.9: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_9:%.*]] = icmp ule i64 9, [[N]] +; CHECK-NEXT: br i1 [[CMP2_9]], label [[LOOP_10:%.*]], label [[EXIT]] +; CHECK: loop.10: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[LATCH_10:%.*]] +; CHECK: latch.10: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: [[CMP2_10:%.*]] = icmp ule i64 10, [[N]] +; CHECK-NEXT: br i1 [[CMP2_10]], label [[LOOP_11:%.*]], label [[EXIT]] +; CHECK: loop.11: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br i1 false, label [[LATCH_11:%.*]], label [[EXIT]] +; CHECK: latch.11: +; CHECK-NEXT: call void @bar() +; CHECK-NEXT: br label [[EXIT]] ; entry: br label %loop Index: llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll +++ llvm/test/Transforms/LoopUnroll/partial-unroll-non-latch-exit.ll @@ -10,20 +10,60 @@ ; CHECK-NEXT: start: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 24 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT_4:%.*]], [[LATCH_4:%.*]] ] +; CHECK-NEXT: br label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1 ; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i64, i64* [[A1:%.*]], i64 [[IV]] ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i64, i64* [[A2:%.*]], i64 [[IV]] ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8 ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ false, [[LATCH_1:%.*]] ], [ false, [[LATCH_2:%.*]] ], [ false, [[LATCH_3:%.*]] ], [ true, [[LOOP_4:%.*]] ], [ false, [[LATCH_4]] ] ; CHECK-NEXT: ret i1 [[EXIT_VAL]] +; CHECK: loop.1: +; CHECK-NEXT: br label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV_NEXT]], 1 +; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT]] +; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8 +; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8 +; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]] +; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2:%.*]], label [[EXIT]] +; CHECK: loop.2: +; CHECK-NEXT: br label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV_NEXT_1]], 1 +; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_1]] +; CHECK-NEXT: [[LOAD1_2:%.*]] = load i64, i64* [[GEP1_2]], align 8 +; CHECK-NEXT: [[LOAD2_2:%.*]] = load i64, i64* [[GEP2_2]], align 8 +; CHECK-NEXT: [[EXITCOND2_2:%.*]] = icmp eq i64 [[LOAD1_2]], [[LOAD2_2]] +; CHECK-NEXT: br i1 [[EXITCOND2_2]], label [[LOOP_3:%.*]], label [[EXIT]] +; CHECK: loop.3: +; CHECK-NEXT: br label [[LATCH_3]] +; CHECK: latch.3: +; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV_NEXT_2]], 1 +; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_2]] +; CHECK-NEXT: [[LOAD1_3:%.*]] = load i64, i64* [[GEP1_3]], align 8 +; CHECK-NEXT: [[LOAD2_3:%.*]] = load i64, i64* [[GEP2_3]], align 8 +; CHECK-NEXT: [[EXITCOND2_3:%.*]] = icmp eq i64 [[LOAD1_3]], [[LOAD2_3]] +; CHECK-NEXT: br i1 [[EXITCOND2_3]], label [[LOOP_4]], label [[EXIT]] +; CHECK: loop.4: +; CHECK-NEXT: [[EXITCOND_4:%.*]] = icmp eq i64 [[IV_NEXT_3]], 24 +; CHECK-NEXT: br i1 [[EXITCOND_4]], label [[EXIT]], label [[LATCH_4]] +; CHECK: latch.4: +; CHECK-NEXT: [[IV_NEXT_4]] = add nuw nsw i64 [[IV_NEXT_3]], 1 +; CHECK-NEXT: [[GEP1_4:%.*]] = getelementptr inbounds i64, i64* [[A1]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[GEP2_4:%.*]] = getelementptr inbounds i64, i64* [[A2]], i64 [[IV_NEXT_3]] +; CHECK-NEXT: [[LOAD1_4:%.*]] = load i64, i64* [[GEP1_4]], align 8 +; CHECK-NEXT: [[LOAD2_4:%.*]] = load i64, i64* [[GEP2_4]], align 8 +; CHECK-NEXT: [[EXITCOND2_4:%.*]] = icmp eq i64 [[LOAD1_4]], [[LOAD2_4]] +; CHECK-NEXT: br i1 [[EXITCOND2_4]], label [[LOOP]], label [[EXIT]] ; start: br label %loop Index: llvm/test/Transforms/LoopUnroll/runtime-loop-known-exit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop-known-exit.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop-known-exit.ll @@ -10,80 +10,19 @@ ; CHECK-LABEL: @test( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[N2:%.*]] = add i32 [[S:%.*]], 123 -; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], [[S]] -; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[N]], [[S]] -; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP1]], 7 -; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LOOP_PROL_PREHEADER:%.*]], label [[LOOP_PROL_LOOPEXIT:%.*]] -; CHECK: loop.prol.preheader: -; CHECK-NEXT: br label [[LOOP_PROL:%.*]] -; CHECK: loop.prol: -; CHECK-NEXT: [[I_PROL:%.*]] = phi i32 [ [[S]], [[LOOP_PROL_PREHEADER]] ], [ [[I_INC_PROL:%.*]], [[LATCH_PROL:%.*]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i32 [ [[XTRAITER]], [[LOOP_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ] -; CHECK-NEXT: [[C1_PROL:%.*]] = icmp eq i32 [[I_PROL]], [[N2]] -; CHECK-NEXT: br i1 [[C1_PROL]], label [[EXIT1_LOOPEXIT1:%.*]], label [[LATCH_PROL]] -; CHECK: latch.prol: -; CHECK-NEXT: [[C2_PROL:%.*]] = icmp eq i32 [[I_PROL]], [[N]] -; CHECK-NEXT: [[I_INC_PROL]] = add i32 [[I_PROL]], 1 -; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i32 [[PROL_ITER]], 1 -; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i32 [[PROL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[LOOP_PROL]], label [[LOOP_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; CHECK: loop.prol.loopexit.unr-lcssa: -; CHECK-NEXT: [[I_UNR_PH:%.*]] = phi i32 [ [[I_INC_PROL]], [[LATCH_PROL]] ] -; CHECK-NEXT: br label [[LOOP_PROL_LOOPEXIT]] -; CHECK: loop.prol.loopexit: -; CHECK-NEXT: [[I_UNR:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[I_UNR_PH]], [[LOOP_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP2]], 7 -; CHECK-NEXT: br i1 [[TMP3]], label [[EXIT2:%.*]], label [[ENTRY_NEW:%.*]] -; CHECK: entry.new: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[I_UNR]], [[ENTRY_NEW]] ], [ [[I_INC_7:%.*]], [[LATCH_7:%.*]] ] +; CHECK-NEXT: [[I:%.*]] = phi i32 [ [[S]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LATCH:%.*]] ] ; CHECK-NEXT: [[C1:%.*]] = icmp eq i32 [[I]], [[N2]] -; CHECK-NEXT: br i1 [[C1]], label [[EXIT1_LOOPEXIT:%.*]], label [[LATCH:%.*]] +; CHECK-NEXT: br i1 [[C1]], label [[EXIT1:%.*]], label [[LATCH]] ; CHECK: latch: -; CHECK-NEXT: [[I_INC:%.*]] = add i32 [[I]], 1 -; CHECK-NEXT: [[C1_1:%.*]] = icmp eq i32 [[I_INC]], [[N2]] -; CHECK-NEXT: br i1 [[C1_1]], label [[EXIT1_LOOPEXIT]], label [[LATCH_1:%.*]] -; CHECK: exit1.loopexit: -; CHECK-NEXT: br label [[EXIT1:%.*]] -; CHECK: exit1.loopexit1: -; CHECK-NEXT: br label [[EXIT1]] +; CHECK-NEXT: [[C2:%.*]] = icmp eq i32 [[I]], [[N:%.*]] +; CHECK-NEXT: [[I_INC]] = add i32 [[I]], 1 +; CHECK-NEXT: br i1 [[C2]], label [[EXIT2:%.*]], label [[LOOP]] ; CHECK: exit1: ; CHECK-NEXT: ret void -; CHECK: exit2.unr-lcssa: -; CHECK-NEXT: br label [[EXIT2]] ; CHECK: exit2: ; CHECK-NEXT: ret void -; CHECK: latch.1: -; CHECK-NEXT: [[I_INC_1:%.*]] = add i32 [[I_INC]], 1 -; CHECK-NEXT: [[C1_2:%.*]] = icmp eq i32 [[I_INC_1]], [[N2]] -; CHECK-NEXT: br i1 [[C1_2]], label [[EXIT1_LOOPEXIT]], label [[LATCH_2:%.*]] -; CHECK: latch.2: -; CHECK-NEXT: [[I_INC_2:%.*]] = add i32 [[I_INC_1]], 1 -; CHECK-NEXT: [[C1_3:%.*]] = icmp eq i32 [[I_INC_2]], [[N2]] -; CHECK-NEXT: br i1 [[C1_3]], label [[EXIT1_LOOPEXIT]], label [[LATCH_3:%.*]] -; CHECK: latch.3: -; CHECK-NEXT: [[I_INC_3:%.*]] = add i32 [[I_INC_2]], 1 -; CHECK-NEXT: [[C1_4:%.*]] = icmp eq i32 [[I_INC_3]], [[N2]] -; CHECK-NEXT: br i1 [[C1_4]], label [[EXIT1_LOOPEXIT]], label [[LATCH_4:%.*]] -; CHECK: latch.4: -; CHECK-NEXT: [[I_INC_4:%.*]] = add i32 [[I_INC_3]], 1 -; CHECK-NEXT: [[C1_5:%.*]] = icmp eq i32 [[I_INC_4]], [[N2]] -; CHECK-NEXT: br i1 [[C1_5]], label [[EXIT1_LOOPEXIT]], label [[LATCH_5:%.*]] -; CHECK: latch.5: -; CHECK-NEXT: [[I_INC_5:%.*]] = add i32 [[I_INC_4]], 1 -; CHECK-NEXT: [[C1_6:%.*]] = icmp eq i32 [[I_INC_5]], [[N2]] -; CHECK-NEXT: br i1 [[C1_6]], label [[EXIT1_LOOPEXIT]], label [[LATCH_6:%.*]] -; CHECK: latch.6: -; CHECK-NEXT: [[I_INC_6:%.*]] = add i32 [[I_INC_5]], 1 -; CHECK-NEXT: [[C1_7:%.*]] = icmp eq i32 [[I_INC_6]], [[N2]] -; CHECK-NEXT: br i1 [[C1_7]], label [[EXIT1_LOOPEXIT]], label [[LATCH_7]] -; CHECK: latch.7: -; CHECK-NEXT: [[C2_7:%.*]] = icmp eq i32 [[I_INC_6]], [[N]] -; CHECK-NEXT: [[I_INC_7]] = add i32 [[I_INC_6]], 1 -; CHECK-NEXT: br i1 [[C2_7]], label [[EXIT2_UNR_LCSSA:%.*]], label [[LOOP]] ; entry: %n2 = add i32 %s, 123 Index: llvm/test/Transforms/LoopUnroll/scevunroll.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -60,30 +60,46 @@ ; SCEV unrolling properly handles loops with multiple exits. In this ; case, the computed trip count based on a canonical IV is *not* for a -; latch block. Canonical unrolling incorrectly unrolls it, but SCEV -; unrolling does not. +; latch block. define i64 @earlyLoopTest(i64* %base) nounwind { ; CHECK-LABEL: @earlyLoopTest( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TAIL:%.*]] ] -; CHECK-NEXT: [[S:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[S_NEXT:%.*]], [[TAIL]] ] -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[ADR]], align 4 -; CHECK-NEXT: [[S_NEXT]] = add i64 [[S]], [[VAL]] -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[INC]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[TAIL]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[BASE:%.*]], align 4 +; CHECK-NEXT: br label [[TAIL:%.*]] ; CHECK: tail: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0 -; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]] ; CHECK: exit1: -; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S]], [[LOOP]] ] +; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S_NEXT_2:%.*]], [[LOOP_3:%.*]] ] ; CHECK-NEXT: ret i64 [[S_LCSSA]] ; CHECK: exit2: -; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[S_NEXT]], [[TAIL]] ] +; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1:%.*]], [[TAIL_1:%.*]] ], [ [[S_NEXT_2]], [[TAIL_2:%.*]] ], [ [[S_NEXT_3:%.*]], [[TAIL_3:%.*]] ] ; CHECK-NEXT: ret i64 [[S_NEXT_LCSSA1]] +; CHECK: loop.1: +; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1 +; CHECK-NEXT: [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4 +; CHECK-NEXT: [[S_NEXT_1]] = add i64 [[VAL]], [[VAL_1]] +; CHECK-NEXT: br label [[TAIL_1]] +; CHECK: tail.1: +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0 +; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2:%.*]], label [[EXIT2]] +; CHECK: loop.2: +; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2 +; CHECK-NEXT: [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4 +; CHECK-NEXT: [[S_NEXT_2]] = add i64 [[S_NEXT_1]], [[VAL_2]] +; CHECK-NEXT: br label [[TAIL_2]] +; CHECK: tail.2: +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0 +; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3]], label [[EXIT2]] +; CHECK: loop.3: +; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i64, i64* [[BASE]], i64 3 +; CHECK-NEXT: [[VAL_3:%.*]] = load i64, i64* [[ADR_3]], align 4 +; CHECK-NEXT: [[S_NEXT_3]] = add i64 [[S_NEXT_2]], [[VAL_3]] +; CHECK-NEXT: br i1 false, label [[TAIL_3]], label [[EXIT1:%.*]] +; CHECK: tail.3: +; CHECK-NEXT: br label [[EXIT2]] ; entry: br label %loop @@ -115,18 +131,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L1:%.*]] ; CHECK: l1: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L2:%.*]] ] -; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4 -; CHECK-NEXT: br i1 false, label [[L2]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[L2:%.*]], label [[EXIT1:%.*]] ; CHECK: l2: -; CHECK-NEXT: br i1 true, label [[L1]], label [[EXIT2:%.*]] +; CHECK-NEXT: ret i32 [[VAL]] ; CHECK: exit1: ; CHECK-NEXT: ret i32 1 -; CHECK: exit2: -; CHECK-NEXT: [[VAL_LCSSA1:%.*]] = phi i32 [ [[VAL]], [[L2]] ] -; CHECK-NEXT: ret i32 [[VAL_LCSSA1]] ; entry: br label %l1 @@ -149,32 +159,69 @@ } -; SCEV should not unroll a multi-exit loops unless the latch block has -; a known trip count, regardless of the early exit trip counts. The -; LoopUnroll utility uses this assumption to optimize the latch -; block's branch. +; SCEV can unroll a multi-exit loops even if the latch block has no +; known trip count, but an early exit has a known trip count. In this +; case we must be careful not to optimize the latch branch away. define i32 @multiExitIncomplete(i32* %base) nounwind { ; CHECK-LABEL: @multiExitIncomplete( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L1:%.*]] ; CHECK: l1: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L3:%.*]] ] -; CHECK-NEXT: [[INC1]] = add nuw i32 [[IV1]], 1 -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV1]], 5 -; CHECK-NEXT: br i1 [[CMP1]], label [[L2:%.*]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 +; CHECK-NEXT: br label [[L2:%.*]] ; CHECK: l2: -; CHECK-NEXT: br i1 true, label [[L3]], label [[EXIT2:%.*]] +; CHECK-NEXT: br label [[L3:%.*]] ; CHECK: l3: ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0 -; CHECK-NEXT: br i1 [[CMP3]], label [[L1]], label [[EXIT3:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]] ; CHECK: exit1: ; CHECK-NEXT: ret i32 1 ; CHECK: exit2: ; CHECK-NEXT: ret i32 2 ; CHECK: exit3: ; CHECK-NEXT: ret i32 3 +; CHECK: l1.1: +; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1 +; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4 +; CHECK-NEXT: br label [[L2_1:%.*]] +; CHECK: l2.1: +; CHECK-NEXT: br label [[L3_1:%.*]] +; CHECK: l3.1: +; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0 +; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]] +; CHECK: l1.2: +; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i32, i32* [[BASE]], i32 2 +; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4 +; CHECK-NEXT: br label [[L2_2:%.*]] +; CHECK: l2.2: +; CHECK-NEXT: br label [[L3_2:%.*]] +; CHECK: l3.2: +; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0 +; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]] +; CHECK: l1.3: +; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i32, i32* [[BASE]], i32 3 +; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4 +; CHECK-NEXT: br label [[L2_3:%.*]] +; CHECK: l2.3: +; CHECK-NEXT: br label [[L3_3:%.*]] +; CHECK: l3.3: +; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0 +; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]] +; CHECK: l1.4: +; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr i32, i32* [[BASE]], i32 4 +; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4 +; CHECK-NEXT: br label [[L2_4:%.*]] +; CHECK: l2.4: +; CHECK-NEXT: br label [[L3_4:%.*]] +; CHECK: l3.4: +; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0 +; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]] +; CHECK: l1.5: +; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1:%.*]] +; CHECK: l2.5: +; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2:%.*]] +; CHECK: l3.5: +; CHECK-NEXT: br label [[EXIT3]] ; entry: br label %l1 @@ -263,17 +310,18 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[B_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_COND:%.*]] ] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B_03]], 0 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[B_03]], 8 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[RETURN:%.*]] +; CHECK-NEXT: br label [[FOR_COND:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: br i1 false, label [[RETURN]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 false, label [[RETURN:%.*]], label [[FOR_BODY_1:%.*]] ; CHECK: return: -; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] +; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1:%.*]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 ; CHECK-NEXT: ret void +; CHECK: for.body.1: +; CHECK-NEXT: br i1 false, label [[FOR_COND_1]], label [[RETURN]] +; CHECK: for.cond.1: +; CHECK-NEXT: br label [[RETURN]] ; entry: br label %for.body Index: llvm/test/Transforms/LoopUnroll/unloop.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/unloop.ll +++ llvm/test/Transforms/LoopUnroll/unloop.ll @@ -483,7 +483,7 @@ ; CHECK: for.cond.i: ; CHECK-NEXT: br label [[FOR_COND_I]] ; CHECK: Proc2.exit: -; CHECK-NEXT: br label [[FOR_COND31]] +; CHECK-NEXT: unreachable ; CHECK: for.end94: ; CHECK-NEXT: ret void ; Index: llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll +++ llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll @@ -8,25 +8,53 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 123, [[ENTRY:%.*]] ], [ [[RES_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC9:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[I_0]] -; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[PTR]], align 2 -; CHECK-NEXT: [[RES_NEXT]] = add i16 [[RES]], [[LV]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_0]], 3 -; CHECK-NEXT: br i1 [[CMP]], label [[EXITING_1:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[RES_NEXT:%.*]] = add i16 123, [[LV]] +; CHECK-NEXT: br label [[EXITING_1:%.*]] ; CHECK: exiting.1: ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i16 [[LV]], [[X:%.*]] -; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[EXITING_2:%.*]] +; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT:%.*]], label [[EXITING_2:%.*]] ; CHECK: exiting.2: ; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i16 [[LV]], [[Y:%.*]] -; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH]] +; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[INC9]] = add i64 [[I_0]], 1 -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1 +; CHECK-NEXT: [[LV_1:%.*]] = load i16, i16* [[PTR_1]], align 2 +; CHECK-NEXT: [[RES_NEXT_1:%.*]] = add i16 [[RES_NEXT]], [[LV_1]] +; CHECK-NEXT: br label [[EXITING_1_1:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ [[RES_NEXT]], [[HEADER]] ], [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1:%.*]] ], [ 0, [[EXITING_1_2:%.*]] ], [ 1, [[EXITING_2_2:%.*]] ], [ [[RES_NEXT_3:%.*]], [[LATCH_2:%.*]] ], [ 0, [[EXITING_1_3:%.*]] ], [ 1, [[EXITING_2_3:%.*]] ] ; CHECK-NEXT: ret i16 [[RES_LCSSA]] +; CHECK: exiting.1.1: +; CHECK-NEXT: [[EC_1_1:%.*]] = icmp eq i16 [[LV_1]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1]] +; CHECK: exiting.2.1: +; CHECK-NEXT: [[EC_2_1:%.*]] = icmp eq i16 [[LV_1]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_1]], label [[EXIT]], label [[LATCH_1:%.*]] +; CHECK: latch.1: +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2 +; CHECK-NEXT: [[LV_2:%.*]] = load i16, i16* [[PTR_2]], align 2 +; CHECK-NEXT: [[RES_NEXT_2:%.*]] = add i16 [[RES_NEXT_1]], [[LV_2]] +; CHECK-NEXT: br label [[EXITING_1_2]] +; CHECK: exiting.1.2: +; CHECK-NEXT: [[EC_1_2:%.*]] = icmp eq i16 [[LV_2]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2]] +; CHECK: exiting.2.2: +; CHECK-NEXT: [[EC_2_2:%.*]] = icmp eq i16 [[LV_2]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3 +; CHECK-NEXT: [[LV_3:%.*]] = load i16, i16* [[PTR_3]], align 2 +; CHECK-NEXT: [[RES_NEXT_3]] = add i16 [[RES_NEXT_2]], [[LV_3]] +; CHECK-NEXT: br i1 false, label [[EXITING_1_3]], label [[EXIT]] +; CHECK: exiting.1.3: +; CHECK-NEXT: [[EC_1_3:%.*]] = icmp eq i16 [[LV_3]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3]] +; CHECK: exiting.2.3: +; CHECK-NEXT: [[EC_2_3:%.*]] = icmp eq i16 [[LV_3]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_3]], label [[EXIT]], label [[LATCH_3:%.*]] +; CHECK: latch.3: +; CHECK-NEXT: unreachable ; entry: br label %header