Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -1106,20 +1106,39 @@ return LoopUnrollResult::Unmodified; } - // Find trip count and trip multiple if count is not available + BasicBlock *Latch = L->getLoopLatch(); + assert(Latch && "Loop simplify form must have latch"); + + // Find the smallest exact constant trip count for any latch-dominating exit. + // If there is none, find the largest trip multiple instead. unsigned TripCount = 0; unsigned TripMultiple = 1; - // If there are multiple exiting blocks but one of them is the latch, use the - // latch for the trip count estimation. Otherwise insist on a single exiting - // block for the trip count estimation. - BasicBlock *ExitingBlock = L->getLoopLatch(); - if (!ExitingBlock || !L->isLoopExiting(ExitingBlock)) - ExitingBlock = L->getExitingBlock(); - if (ExitingBlock) { - TripCount = SE.getSmallConstantTripCount(L, ExitingBlock); - TripMultiple = SE.getSmallConstantTripMultiple(L, ExitingBlock); + BasicBlock *AnalyzedExit = nullptr; + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) { + // Only unroll against latch-dominating exit blocks. + if (!DT.dominates(ExitingBlock, Latch)) + continue; + + if (unsigned TC = SE.getSmallConstantTripCount(L, ExitingBlock)) { + if (!TripCount || TC < TripCount) { + TripMultiple = TripCount = TC; + AnalyzedExit = ExitingBlock; + } + } + + if (!TripCount) + if (unsigned TM = SE.getSmallConstantTripMultiple(L, ExitingBlock)) + TripMultiple = std::max(TM, TripMultiple); } + // Determine whether the trip count is for an early exit, i.e. neither the + // unique exit nor the latch exit. In this case unrolling should not optimize + // conditional branches, as it does not know which exit to optimize. + bool IsEarlyExit = + AnalyzedExit && ExitingBlocks.size() != 1 && AnalyzedExit != Latch; + // If the loop contains a convergent operation, the prelude we'd add // to do the first few instructions before we hit the unrolled loop // is unsafe -- it adds a control-flow dependency to the convergent @@ -1159,13 +1178,14 @@ // Save loop properties before it is transformed. MDNode *OrigLoopID = L->getLoopID(); - // Unroll the loop. + // Unroll the loop. Preserve the latch branch for upper-bound and early-exit + // unrolling. Loop *RemainderLoop = nullptr; LoopUnrollResult UnrollResult = UnrollLoop( L, {UP.Count, TripCount, UP.Force, UP.Runtime, UP.AllowExpensiveTripCount, - UseUpperBound, MaxOrZero, TripMultiple, PP.PeelCount, UP.UnrollRemainder, - ForgetAllSCEV}, + UseUpperBound || IsEarlyExit, MaxOrZero, TripMultiple, PP.PeelCount, + UP.UnrollRemainder, ForgetAllSCEV}, LI, &SE, &DT, &AC, &TTI, &ORE, PreserveLCSSA, &RemainderLoop); if (UnrollResult == LoopUnrollResult::Unmodified) return LoopUnrollResult::Unmodified; Index: llvm/test/Transforms/LoopUnroll/full-unroll-non-latch-exit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/full-unroll-non-latch-exit.ll +++ llvm/test/Transforms/LoopUnroll/full-unroll-non-latch-exit.ll @@ -20,20 +20,30 @@ ; CHECK-NEXT: store i64 -8661621401413125213, i64* [[A2_1]], align 8 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[START:%.*]] ], [ [[IV_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV]], 2 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 [[IV]] -; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 [[IV]] +; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 0 +; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 0 ; CHECK-NEXT: [[LOAD1:%.*]] = load i64, i64* [[GEP1]], align 8 ; CHECK-NEXT: [[LOAD2:%.*]] = load i64, i64* [[GEP2]], align 8 ; CHECK-NEXT: [[EXITCOND2:%.*]] = icmp eq i64 [[LOAD1]], [[LOAD2]] -; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP]], label [[EXIT]] +; CHECK-NEXT: br i1 [[EXITCOND2]], label [[LOOP_1:%.*]], label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ] +; CHECK-NEXT: [[EXIT_VAL:%.*]] = phi i1 [ false, [[LATCH]] ], [ true, [[LOOP]] ], [ true, [[LOOP_1]] ], [ false, [[LATCH_1:%.*]] ], [ true, [[LOOP_2:%.*]] ], [ false, [[LATCH_2:%.*]] ] ; CHECK-NEXT: ret i1 [[EXIT_VAL]] +; CHECK: loop.1: +; CHECK-NEXT: br i1 false, label [[EXIT]], label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: [[GEP1_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A1]], i64 0, i64 1 +; CHECK-NEXT: [[GEP2_1:%.*]] = getelementptr inbounds [2 x i64], [2 x i64]* [[A2]], i64 0, i64 1 +; CHECK-NEXT: [[LOAD1_1:%.*]] = load i64, i64* [[GEP1_1]], align 8 +; CHECK-NEXT: [[LOAD2_1:%.*]] = load i64, i64* [[GEP2_1]], align 8 +; CHECK-NEXT: [[EXITCOND2_1:%.*]] = icmp eq i64 [[LOAD1_1]], [[LOAD2_1]] +; CHECK-NEXT: br i1 [[EXITCOND2_1]], label [[LOOP_2]], label [[EXIT]] +; CHECK: loop.2: +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: br label [[EXIT]] ; start: %a1 = alloca [2 x i64], align 8 Index: llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll +++ llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll @@ -165,43 +165,35 @@ ; CHECK-NEXT: call void @bar(i32 [[TMP0]]) ; CHECK-NEXT: br label [[FOR_HEADER:%.*]] ; CHECK: for.header: -; CHECK-NEXT: [[TMP1:%.*]] = phi i32 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[DOTPRE_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]] ] -; CHECK-NEXT: [[I:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC_3:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]] ] -; CHECK-NEXT: call void @bar(i32 [[TMP1]]) +; CHECK-NEXT: call void @bar(i32 [[TMP0]]) ; CHECK-NEXT: br i1 [[COND:%.*]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i64 [[I]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1 ; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_1:%.*]], label [[FOR_END]] ; CHECK: for.end: ; CHECK-NEXT: ret void ; CHECK: for.body.1: -; CHECK-NEXT: [[INC_1:%.*]] = add nuw nsw i64 [[INC]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.1: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_1]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2 ; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_2:%.*]], label [[FOR_END]] ; CHECK: for.body.2: -; CHECK-NEXT: [[INC_2:%.*]] = add nuw nsw i64 [[INC_1]], 1 ; CHECK-NEXT: br i1 true, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.2: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_2]] +; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3 ; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4 ; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]]) ; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY_3:%.*]], label [[FOR_END]] ; CHECK: for.body.3: -; CHECK-NEXT: [[INC_3]] = add nuw nsw i64 [[INC_2]], 1 -; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3]], label [[FOR_END]] +; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END]] ; CHECK: for.body.for.body_crit_edge.3: -; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INC_3]] -; CHECK-NEXT: [[DOTPRE_3]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_3]], align 4 -; CHECK-NEXT: br label [[FOR_HEADER]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK-NEXT: unreachable ; entry: %0 = load i32, i32* %A, align 4 Index: llvm/test/Transforms/LoopUnroll/scevunroll.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/scevunroll.ll +++ llvm/test/Transforms/LoopUnroll/scevunroll.ll @@ -60,30 +60,46 @@ ; SCEV unrolling properly handles loops with multiple exits. In this ; case, the computed trip count based on a canonical IV is *not* for a -; latch block. Canonical unrolling incorrectly unrolls it, but SCEV -; unrolling does not. +; latch block. define i64 @earlyLoopTest(i64* %base) nounwind { ; CHECK-LABEL: @earlyLoopTest( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[TAIL:%.*]] ] -; CHECK-NEXT: [[S:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[S_NEXT:%.*]], [[TAIL]] ] -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i64, i64* [[BASE:%.*]], i64 [[IV]] -; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[ADR]], align 4 -; CHECK-NEXT: [[S_NEXT]] = add i64 [[S]], [[VAL]] -; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[IV]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[INC]], 4 -; CHECK-NEXT: br i1 [[CMP]], label [[TAIL]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i64, i64* [[BASE:%.*]], align 4 +; CHECK-NEXT: br i1 true, label [[TAIL:%.*]], label [[EXIT1:%.*]] ; CHECK: tail: ; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[VAL]], 0 -; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_1:%.*]], label [[EXIT2:%.*]] ; CHECK: exit1: -; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ [[S]], [[LOOP]] ] +; CHECK-NEXT: [[S_LCSSA:%.*]] = phi i64 [ 0, [[LOOP]] ], [ [[VAL]], [[LOOP_1]] ], [ [[S_NEXT_1:%.*]], [[LOOP_2:%.*]] ], [ [[S_NEXT_2:%.*]], [[LOOP_3:%.*]] ] ; CHECK-NEXT: ret i64 [[S_LCSSA]] ; CHECK: exit2: -; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[S_NEXT]], [[TAIL]] ] +; CHECK-NEXT: [[S_NEXT_LCSSA1:%.*]] = phi i64 [ [[VAL]], [[TAIL]] ], [ [[S_NEXT_1]], [[TAIL_1:%.*]] ], [ [[S_NEXT_2]], [[TAIL_2:%.*]] ], [ [[S_NEXT_3:%.*]], [[TAIL_3:%.*]] ] ; CHECK-NEXT: ret i64 [[S_NEXT_LCSSA1]] +; CHECK: loop.1: +; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i64, i64* [[BASE]], i64 1 +; CHECK-NEXT: [[VAL_1:%.*]] = load i64, i64* [[ADR_1]], align 4 +; CHECK-NEXT: [[S_NEXT_1]] = add i64 [[VAL]], [[VAL_1]] +; CHECK-NEXT: br i1 true, label [[TAIL_1]], label [[EXIT1]] +; CHECK: tail.1: +; CHECK-NEXT: [[CMP2_1:%.*]] = icmp ne i64 [[VAL_1]], 0 +; CHECK-NEXT: br i1 [[CMP2_1]], label [[LOOP_2]], label [[EXIT2]] +; CHECK: loop.2: +; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i64, i64* [[BASE]], i64 2 +; CHECK-NEXT: [[VAL_2:%.*]] = load i64, i64* [[ADR_2]], align 4 +; CHECK-NEXT: [[S_NEXT_2]] = add i64 [[S_NEXT_1]], [[VAL_2]] +; CHECK-NEXT: br i1 true, label [[TAIL_2]], label [[EXIT1]] +; CHECK: tail.2: +; CHECK-NEXT: [[CMP2_2:%.*]] = icmp ne i64 [[VAL_2]], 0 +; CHECK-NEXT: br i1 [[CMP2_2]], label [[LOOP_3]], label [[EXIT2]] +; CHECK: loop.3: +; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i64, i64* [[BASE]], i64 3 +; CHECK-NEXT: [[VAL_3:%.*]] = load i64, i64* [[ADR_3]], align 4 +; CHECK-NEXT: [[S_NEXT_3]] = add i64 [[S_NEXT_2]], [[VAL_3]] +; CHECK-NEXT: br i1 false, label [[TAIL_3]], label [[EXIT1]] +; CHECK: tail.3: +; CHECK-NEXT: br label [[EXIT2]] ; entry: br label %loop @@ -115,18 +131,12 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L1:%.*]] ; CHECK: l1: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L2:%.*]] ] -; CHECK-NEXT: [[INC1]] = add nuw nsw i32 [[IV1]], 1 -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4 -; CHECK-NEXT: br i1 false, label [[L2]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 +; CHECK-NEXT: br i1 false, label [[L2:%.*]], label [[EXIT1:%.*]] ; CHECK: l2: -; CHECK-NEXT: br i1 true, label [[L1]], label [[EXIT2:%.*]] +; CHECK-NEXT: ret i32 [[VAL]] ; CHECK: exit1: ; CHECK-NEXT: ret i32 1 -; CHECK: exit2: -; CHECK-NEXT: [[VAL_LCSSA1:%.*]] = phi i32 [ [[VAL]], [[L2]] ] -; CHECK-NEXT: ret i32 [[VAL_LCSSA1]] ; entry: br label %l1 @@ -149,32 +159,69 @@ } -; SCEV should not unroll a multi-exit loops unless the latch block has -; a known trip count, regardless of the early exit trip counts. The -; LoopUnroll utility uses this assumption to optimize the latch -; block's branch. +; SCEV can unroll a multi-exit loops even if the latch block has no +; known trip count, but an early exit has a known trip count. In this +; case we must be careful not to optimize the latch branch away. define i32 @multiExitIncomplete(i32* %base) nounwind { ; CHECK-LABEL: @multiExitIncomplete( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[L1:%.*]] ; CHECK: l1: -; CHECK-NEXT: [[IV1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC1:%.*]], [[L3:%.*]] ] -; CHECK-NEXT: [[INC1]] = add nuw i32 [[IV1]], 1 -; CHECK-NEXT: [[ADR:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i32 [[IV1]] -; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[ADR]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ult i32 [[IV1]], 5 -; CHECK-NEXT: br i1 [[CMP1]], label [[L2:%.*]], label [[EXIT1:%.*]] +; CHECK-NEXT: [[VAL:%.*]] = load i32, i32* [[BASE:%.*]], align 4 +; CHECK-NEXT: br i1 true, label [[L2:%.*]], label [[EXIT1:%.*]] ; CHECK: l2: -; CHECK-NEXT: br i1 true, label [[L3]], label [[EXIT2:%.*]] +; CHECK-NEXT: br i1 true, label [[L3:%.*]], label [[EXIT2:%.*]] ; CHECK: l3: ; CHECK-NEXT: [[CMP3:%.*]] = icmp ne i32 [[VAL]], 0 -; CHECK-NEXT: br i1 [[CMP3]], label [[L1]], label [[EXIT3:%.*]] +; CHECK-NEXT: br i1 [[CMP3]], label [[L1_1:%.*]], label [[EXIT3:%.*]] ; CHECK: exit1: ; CHECK-NEXT: ret i32 1 ; CHECK: exit2: ; CHECK-NEXT: ret i32 2 ; CHECK: exit3: ; CHECK-NEXT: ret i32 3 +; CHECK: l1.1: +; CHECK-NEXT: [[ADR_1:%.*]] = getelementptr i32, i32* [[BASE]], i32 1 +; CHECK-NEXT: [[VAL_1:%.*]] = load i32, i32* [[ADR_1]], align 4 +; CHECK-NEXT: br i1 true, label [[L2_1:%.*]], label [[EXIT1]] +; CHECK: l2.1: +; CHECK-NEXT: br i1 true, label [[L3_1:%.*]], label [[EXIT2]] +; CHECK: l3.1: +; CHECK-NEXT: [[CMP3_1:%.*]] = icmp ne i32 [[VAL_1]], 0 +; CHECK-NEXT: br i1 [[CMP3_1]], label [[L1_2:%.*]], label [[EXIT3]] +; CHECK: l1.2: +; CHECK-NEXT: [[ADR_2:%.*]] = getelementptr i32, i32* [[BASE]], i32 2 +; CHECK-NEXT: [[VAL_2:%.*]] = load i32, i32* [[ADR_2]], align 4 +; CHECK-NEXT: br i1 true, label [[L2_2:%.*]], label [[EXIT1]] +; CHECK: l2.2: +; CHECK-NEXT: br i1 true, label [[L3_2:%.*]], label [[EXIT2]] +; CHECK: l3.2: +; CHECK-NEXT: [[CMP3_2:%.*]] = icmp ne i32 [[VAL_2]], 0 +; CHECK-NEXT: br i1 [[CMP3_2]], label [[L1_3:%.*]], label [[EXIT3]] +; CHECK: l1.3: +; CHECK-NEXT: [[ADR_3:%.*]] = getelementptr i32, i32* [[BASE]], i32 3 +; CHECK-NEXT: [[VAL_3:%.*]] = load i32, i32* [[ADR_3]], align 4 +; CHECK-NEXT: br i1 true, label [[L2_3:%.*]], label [[EXIT1]] +; CHECK: l2.3: +; CHECK-NEXT: br i1 true, label [[L3_3:%.*]], label [[EXIT2]] +; CHECK: l3.3: +; CHECK-NEXT: [[CMP3_3:%.*]] = icmp ne i32 [[VAL_3]], 0 +; CHECK-NEXT: br i1 [[CMP3_3]], label [[L1_4:%.*]], label [[EXIT3]] +; CHECK: l1.4: +; CHECK-NEXT: [[ADR_4:%.*]] = getelementptr i32, i32* [[BASE]], i32 4 +; CHECK-NEXT: [[VAL_4:%.*]] = load i32, i32* [[ADR_4]], align 4 +; CHECK-NEXT: br i1 true, label [[L2_4:%.*]], label [[EXIT1]] +; CHECK: l2.4: +; CHECK-NEXT: br i1 true, label [[L3_4:%.*]], label [[EXIT2]] +; CHECK: l3.4: +; CHECK-NEXT: [[CMP3_4:%.*]] = icmp ne i32 [[VAL_4]], 0 +; CHECK-NEXT: br i1 [[CMP3_4]], label [[L1_5:%.*]], label [[EXIT3]] +; CHECK: l1.5: +; CHECK-NEXT: br i1 false, label [[L2_5:%.*]], label [[EXIT1]] +; CHECK: l2.5: +; CHECK-NEXT: br i1 true, label [[L3_5:%.*]], label [[EXIT2]] +; CHECK: l3.5: +; CHECK-NEXT: br label [[EXIT3]] ; entry: br label %l1 @@ -263,17 +310,18 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[B_03:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_COND:%.*]] ] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[B_03]], 0 -; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[B_03]], 8 -; CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_COND]], label [[RETURN:%.*]] +; CHECK-NEXT: br i1 true, label [[FOR_COND:%.*]], label [[RETURN:%.*]] ; CHECK: for.cond: -; CHECK-NEXT: br i1 false, label [[RETURN]], label [[FOR_BODY]] +; CHECK-NEXT: br i1 false, label [[RETURN]], label [[FOR_BODY_1:%.*]] ; CHECK: return: -; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] -; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ] +; CHECK-NEXT: [[B_03_LCSSA:%.*]] = phi i32 [ 8, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ], [ 8, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1:%.*]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 1, [[FOR_BODY]] ], [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY_1]] ], [ 0, [[FOR_COND_1]] ] ; CHECK-NEXT: store i32 [[B_03_LCSSA]], i32* [[A:%.*]], align 4 ; CHECK-NEXT: ret void +; CHECK: for.body.1: +; CHECK-NEXT: br i1 false, label [[FOR_COND_1]], label [[RETURN]] +; CHECK: for.cond.1: +; CHECK-NEXT: br label [[RETURN]] ; entry: br label %for.body Index: llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll +++ llvm/test/Transforms/LoopUnroll/unroll-header-exiting-with-phis-multiple-exiting-blocks.ll @@ -8,25 +8,53 @@ ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[RES:%.*]] = phi i16 [ 123, [[ENTRY:%.*]] ], [ [[RES_NEXT:%.*]], [[LATCH:%.*]] ] -; CHECK-NEXT: [[I_0:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INC9:%.*]], [[LATCH]] ] -; CHECK-NEXT: [[PTR:%.*]] = getelementptr inbounds i16, i16* [[A:%.*]], i64 [[I_0]] -; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[PTR]], align 2 -; CHECK-NEXT: [[RES_NEXT]] = add i16 [[RES]], [[LV]] -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[I_0]], 3 -; CHECK-NEXT: br i1 [[CMP]], label [[EXITING_1:%.*]], label [[EXIT:%.*]] +; CHECK-NEXT: [[LV:%.*]] = load i16, i16* [[A:%.*]], align 2 +; CHECK-NEXT: [[RES_NEXT:%.*]] = add i16 123, [[LV]] +; CHECK-NEXT: br i1 true, label [[EXITING_1:%.*]], label [[EXIT:%.*]] ; CHECK: exiting.1: ; CHECK-NEXT: [[EC_1:%.*]] = icmp eq i16 [[LV]], [[X:%.*]] ; CHECK-NEXT: br i1 [[EC_1]], label [[EXIT]], label [[EXITING_2:%.*]] ; CHECK: exiting.2: ; CHECK-NEXT: [[EC_2:%.*]] = icmp eq i16 [[LV]], [[Y:%.*]] -; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH]] +; CHECK-NEXT: br i1 [[EC_2]], label [[EXIT]], label [[LATCH:%.*]] ; CHECK: latch: -; CHECK-NEXT: [[INC9]] = add i64 [[I_0]], 1 -; CHECK-NEXT: br label [[HEADER]] +; CHECK-NEXT: [[PTR_1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 1 +; CHECK-NEXT: [[LV_1:%.*]] = load i16, i16* [[PTR_1]], align 2 +; CHECK-NEXT: [[RES_NEXT_1:%.*]] = add i16 [[RES_NEXT]], [[LV_1]] +; CHECK-NEXT: br i1 true, label [[EXITING_1_1:%.*]], label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ [[RES_NEXT]], [[HEADER]] ], [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ] +; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i16 [ [[RES_NEXT]], [[HEADER]] ], [ 0, [[EXITING_1]] ], [ 1, [[EXITING_2]] ], [ [[RES_NEXT_1]], [[LATCH]] ], [ 0, [[EXITING_1_1]] ], [ 1, [[EXITING_2_1:%.*]] ], [ [[RES_NEXT_2:%.*]], [[LATCH_1:%.*]] ], [ 0, [[EXITING_1_2:%.*]] ], [ 1, [[EXITING_2_2:%.*]] ], [ [[RES_NEXT_3:%.*]], [[LATCH_2:%.*]] ], [ 0, [[EXITING_1_3:%.*]] ], [ 1, [[EXITING_2_3:%.*]] ] ; CHECK-NEXT: ret i16 [[RES_LCSSA]] +; CHECK: exiting.1.1: +; CHECK-NEXT: [[EC_1_1:%.*]] = icmp eq i16 [[LV_1]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_1]], label [[EXIT]], label [[EXITING_2_1]] +; CHECK: exiting.2.1: +; CHECK-NEXT: [[EC_2_1:%.*]] = icmp eq i16 [[LV_1]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_1]], label [[EXIT]], label [[LATCH_1]] +; CHECK: latch.1: +; CHECK-NEXT: [[PTR_2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 2 +; CHECK-NEXT: [[LV_2:%.*]] = load i16, i16* [[PTR_2]], align 2 +; CHECK-NEXT: [[RES_NEXT_2]] = add i16 [[RES_NEXT_1]], [[LV_2]] +; CHECK-NEXT: br i1 true, label [[EXITING_1_2]], label [[EXIT]] +; CHECK: exiting.1.2: +; CHECK-NEXT: [[EC_1_2:%.*]] = icmp eq i16 [[LV_2]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_2]], label [[EXIT]], label [[EXITING_2_2]] +; CHECK: exiting.2.2: +; CHECK-NEXT: [[EC_2_2:%.*]] = icmp eq i16 [[LV_2]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_2]], label [[EXIT]], label [[LATCH_2]] +; CHECK: latch.2: +; CHECK-NEXT: [[PTR_3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 3 +; CHECK-NEXT: [[LV_3:%.*]] = load i16, i16* [[PTR_3]], align 2 +; CHECK-NEXT: [[RES_NEXT_3]] = add i16 [[RES_NEXT_2]], [[LV_3]] +; CHECK-NEXT: br i1 false, label [[EXITING_1_3]], label [[EXIT]] +; CHECK: exiting.1.3: +; CHECK-NEXT: [[EC_1_3:%.*]] = icmp eq i16 [[LV_3]], [[X]] +; CHECK-NEXT: br i1 [[EC_1_3]], label [[EXIT]], label [[EXITING_2_3]] +; CHECK: exiting.2.3: +; CHECK-NEXT: [[EC_2_3:%.*]] = icmp eq i16 [[LV_3]], [[Y]] +; CHECK-NEXT: br i1 [[EC_2_3]], label [[EXIT]], label [[LATCH_3:%.*]] +; CHECK: latch.3: +; CHECK-NEXT: unreachable ; entry: br label %header