Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -52,9 +52,6 @@ "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, cl::desc("Allow runtime unrolling for loops with multiple exits, when " "epilog is generated")); -static cl::opt UnrollRuntimeOtherExitPredictable( - "unroll-runtime-other-exit-predictable", cl::init(false), cl::Hidden, - cl::desc("Assume the non latch exit block to be predictable")); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the @@ -411,8 +408,7 @@ return NewLoop; } -/// Returns true if we can profitably unroll the multi-exit loop L. Currently, -/// we return true only if UnrollRuntimeMultiExit is set to true. +/// Returns true if we can profitably unroll the multi-exit loop L. static bool canProfitablyUnrollMultiExitLoop( Loop *L, SmallVectorImpl &OtherExits, BasicBlock *LatchExit, bool UseEpilogRemainder) { @@ -421,43 +417,16 @@ if (UnrollRuntimeMultiExit.getNumOccurrences()) return UnrollRuntimeMultiExit; - // The main pain point with multi-exit loop unrolling is that once unrolled, - // we will not be able to merge all blocks into a straight line code. - // There are branches within the unrolled loop that go to the OtherExits. - // The second point is the increase in code size, but this is true - // irrespective of multiple exits. - - // Note: Both the heuristics below are coarse grained. We are essentially - // enabling unrolling of loops that have a single side exit other than the - // normal LatchExit (i.e. exiting into a deoptimize block). - // The heuristics considered are: - // 1. low number of branches in the unrolled version. - // 2. high predictability of these extra branches. - // We avoid unrolling loops that have more than two exiting blocks. This - // limits the total number of branches in the unrolled loop to be atmost - // the unroll factor (since one of the exiting blocks is the latch block). - SmallVector ExitingBlocks; - L->getExitingBlocks(ExitingBlocks); - if (ExitingBlocks.size() > 2) - return false; - - // Allow unrolling of loops with no non latch exit blocks. - if (OtherExits.size() == 0) - return true; - - // The second heuristic is that L has one exit other than the latchexit and - // that exit is a deoptimize block. We know that deoptimize blocks are rarely - // taken, which also implies the branch leading to the deoptimize block is - // highly predictable. When UnrollRuntimeOtherExitPredictable is specified, we - // assume the other exit branch is predictable even if it has no deoptimize - // call. - return (OtherExits.size() == 1 && - (UnrollRuntimeOtherExitPredictable || - OtherExits[0]->getTerminatingDeoptimizeCall())); - // TODO: These can be fine-tuned further to consider code size or deopt states - // that are captured by the deoptimize exit block. - // Also, we can extend this to support more cases, if we actually - // know of kinds of multiexit loops that would benefit from unrolling. + // There are two costs to unrolling: static and dynamic. + // * In static cost, a multiple exit loop is analogous to a single exit + // loop with the same instruction count. If we'd unroll the single + // exit form, we should also unroll the multiple exit one. + // * In dynamic cost, a multiple exit loop is analogous to a single + // exit loop with internal control flow. (e.g. imagine a C loop where + // every break is replaced with a continue.) If we'd unroll the internal + // control flow form, we should also unroll the multiple exit form for + // consistency sake. + return true; } // Assign the maximum possible trip count as the back edge weight for the Index: llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll +++ llvm/test/Transforms/LoopUnroll/ARM/multi-blocks.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S < %s -o - | FileCheck %s -; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S < %s -o - | FileCheck %s +; RUN: opt -mtriple=thumbv8m.main -mcpu=cortex-m33 -loop-unroll -S < %s -o - | FileCheck --check-prefixes=CHECK,CHECK-M33 %s +; RUN: opt -mtriple=thumbv7em -mcpu=cortex-m7 -loop-unroll -S < %s -o - | FileCheck --check-prefixes=CHECK,CHECK-M7 %s define void @test_three_blocks(i32* nocapture %Output, ; @@ -184,67 +184,131 @@ ; CHECK-NEXT: [[CMP14:%.*]] = icmp eq i32 [[MAXJ:%.*]], 0 ; CHECK-NEXT: br i1 [[CMP14]], label [[CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]] ; CHECK: for.body.preheader: +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[MAXJ]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i32 [[MAXJ]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3 +; CHECK-NEXT: br i1 [[TMP1]], label [[CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; CHECK: for.body.preheader.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[MAXJ]], [[XTRAITER]] ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: -; CHECK-NEXT: [[J_016:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[INC_3:%.*]], [[IF_END_3:%.*]] ] -; CHECK-NEXT: [[TEMP_015:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_0_ADD_3:%.*]], [[IF_END_3]] ] +; CHECK-NEXT: [[J_016:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[IF_END_3:%.*]] ] +; CHECK-NEXT: [[TEMP_015:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[TEMP_0_ADD_3:%.*]], [[IF_END_3]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[IF_END_3]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_016]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[TMP0]], 65535 -; CHECK-NEXT: br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT:%.*]], label [[IF_END:%.*]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[TMP2]], 65535 +; CHECK-NEXT: br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: ; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_016]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP1]], 0 -; CHECK-NEXT: [[ADD:%.*]] = select i1 [[TOBOOL]], i32 0, i32 [[TMP0]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP3]], 0 +; CHECK-NEXT: [[ADD:%.*]] = select i1 [[TOBOOL]], i32 0, i32 [[TMP2]] ; CHECK-NEXT: [[TEMP_0_ADD:%.*]] = add i32 [[ADD]], [[TEMP_015]] ; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[J_016]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY_1:%.*]], label [[CLEANUP_LOOPEXIT]] -; CHECK: for.body.1: +; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ugt i32 [[TMP2]], 65535 -; CHECK-NEXT: br i1 [[CMP1_1]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_1:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[CMP1_1:%.*]] = icmp ugt i32 [[TMP4]], 65535 +; CHECK-NEXT: br i1 [[CMP1_1]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]], label [[IF_END_1:%.*]] ; CHECK: if.end.1: ; CHECK-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]] -; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4 -; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP3]], 0 -; CHECK-NEXT: [[ADD_1:%.*]] = select i1 [[TOBOOL_1]], i32 0, i32 [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4 +; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[TMP5]], 0 +; CHECK-NEXT: [[ADD_1:%.*]] = select i1 [[TOBOOL_1]], i32 0, i32 [[TMP4]] ; CHECK-NEXT: [[TEMP_0_ADD_1:%.*]] = add i32 [[ADD_1]], [[TEMP_0_ADD]] ; CHECK-NEXT: [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1 -; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[INC_1]], [[MAXJ]] -; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_BODY_2:%.*]], label [[CLEANUP_LOOPEXIT]] -; CHECK: for.body.2: +; CHECK-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]] -; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ugt i32 [[TMP4]], 65535 -; CHECK-NEXT: br i1 [[CMP1_2]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_2:%.*]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[CMP1_2:%.*]] = icmp ugt i32 [[TMP6]], 65535 +; CHECK-NEXT: br i1 [[CMP1_2]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]], label [[IF_END_2:%.*]] ; CHECK: if.end.2: ; CHECK-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]] -; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4 -; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[TMP5]], 0 -; CHECK-NEXT: [[ADD_2:%.*]] = select i1 [[TOBOOL_2]], i32 0, i32 [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4 +; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[TMP7]], 0 +; CHECK-NEXT: [[ADD_2:%.*]] = select i1 [[TOBOOL_2]], i32 0, i32 [[TMP6]] ; CHECK-NEXT: [[TEMP_0_ADD_2:%.*]] = add i32 [[ADD_2]], [[TEMP_0_ADD_1]] ; CHECK-NEXT: [[INC_2:%.*]] = add nuw nsw i32 [[INC_1]], 1 -; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[INC_2]], [[MAXJ]] -; CHECK-NEXT: br i1 [[CMP_2]], label [[FOR_BODY_3:%.*]], label [[CLEANUP_LOOPEXIT]] -; CHECK: for.body.3: +; CHECK-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_2]] -; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ugt i32 [[TMP6]], 65535 -; CHECK-NEXT: br i1 [[CMP1_3]], label [[CLEANUP_LOOPEXIT]], label [[IF_END_3]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[CMP1_3:%.*]] = icmp ugt i32 [[TMP8]], 65535 +; CHECK-NEXT: br i1 [[CMP1_3]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]], label [[IF_END_3]] ; CHECK: if.end.3: ; CHECK-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]] -; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4 -; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[TMP7]], 0 -; CHECK-NEXT: [[ADD_3:%.*]] = select i1 [[TOBOOL_3]], i32 0, i32 [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4 +; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[TMP9]], 0 +; CHECK-NEXT: [[ADD_3:%.*]] = select i1 [[TOBOOL_3]], i32 0, i32 [[TMP8]] ; CHECK-NEXT: [[TEMP_0_ADD_3]] = add i32 [[ADD_3]], [[TEMP_0_ADD_2]] ; CHECK-NEXT: [[INC_3]] = add nuw i32 [[INC_2]], 1 -; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i32 [[INC_3]], [[MAXJ]] -; CHECK-NEXT: br i1 [[CMP_3]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]] +; CHECK-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1 +; CHECK-NEXT: [[NITER_NCMP_3:%.*]] = icmp ne i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] +; CHECK: cleanup.loopexit.unr-lcssa.loopexit: +; CHECK-NEXT: [[TEMP_0_LCSSA_PH_PH_PH:%.*]] = phi i32 [ [[TEMP_0_ADD_3]], [[IF_END_3]] ] +; CHECK-NEXT: [[J_016_UNR_PH:%.*]] = phi i32 [ [[INC_3]], [[IF_END_3]] ] +; CHECK-NEXT: [[TEMP_015_UNR_PH:%.*]] = phi i32 [ [[TEMP_0_ADD_3]], [[IF_END_3]] ] +; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT_UNR_LCSSA]] +; CHECK: cleanup.loopexit.unr-lcssa: +; CHECK-NEXT: [[TEMP_0_LCSSA_PH_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_0_LCSSA_PH_PH_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[J_016_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[J_016_UNR_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[TEMP_015_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_015_UNR_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[CLEANUP_LOOPEXIT:%.*]] +; CHECK: for.body.epil.preheader: +; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]] +; CHECK: for.body.epil: +; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_016_UNR]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 +; CHECK-NEXT: [[CMP1_EPIL:%.*]] = icmp ugt i32 [[TMP10]], 65535 +; CHECK-NEXT: br i1 [[CMP1_EPIL]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2:%.*]], label [[IF_END_EPIL:%.*]] +; CHECK: if.end.epil: +; CHECK-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_016_UNR]] +; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4 +; CHECK-NEXT: [[TOBOOL_EPIL:%.*]] = icmp eq i32 [[TMP11]], 0 +; CHECK-NEXT: [[ADD_EPIL:%.*]] = select i1 [[TOBOOL_EPIL]], i32 0, i32 [[TMP10]] +; CHECK-NEXT: [[TEMP_0_ADD_EPIL:%.*]] = add i32 [[ADD_EPIL]], [[TEMP_015_UNR]] +; CHECK-NEXT: [[INC_EPIL:%.*]] = add nuw i32 [[J_016_UNR]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] +; CHECK: for.body.epil.1: +; CHECK-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]] +; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4 +; CHECK-NEXT: [[CMP1_EPIL_1:%.*]] = icmp ugt i32 [[TMP12]], 65535 +; CHECK-NEXT: br i1 [[CMP1_EPIL_1]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]], label [[IF_END_EPIL_1:%.*]] +; CHECK: if.end.epil.1: +; CHECK-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]] +; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4 +; CHECK-NEXT: [[TOBOOL_EPIL_1:%.*]] = icmp eq i32 [[TMP13]], 0 +; CHECK-NEXT: [[ADD_EPIL_1:%.*]] = select i1 [[TOBOOL_EPIL_1]], i32 0, i32 [[TMP12]] +; CHECK-NEXT: [[TEMP_0_ADD_EPIL_1:%.*]] = add i32 [[ADD_EPIL_1]], [[TEMP_0_ADD_EPIL]] +; CHECK-NEXT: [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] +; CHECK: for.body.epil.2: +; CHECK-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]] +; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4 +; CHECK-NEXT: [[CMP1_EPIL_2:%.*]] = icmp ugt i32 [[TMP14]], 65535 +; CHECK-NEXT: br i1 [[CMP1_EPIL_2]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]], label [[IF_END_EPIL_2:%.*]] +; CHECK: if.end.epil.2: +; CHECK-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]] +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4 +; CHECK-NEXT: [[TOBOOL_EPIL_2:%.*]] = icmp eq i32 [[TMP15]], 0 +; CHECK-NEXT: [[ADD_EPIL_2:%.*]] = select i1 [[TOBOOL_EPIL_2]], i32 0, i32 [[TMP14]] +; CHECK-NEXT: [[TEMP_0_ADD_EPIL_2:%.*]] = add i32 [[ADD_EPIL_2]], [[TEMP_0_ADD_EPIL_1]] +; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] +; CHECK: cleanup.loopexit.epilog-lcssa.loopexit: +; CHECK-NEXT: [[TEMP_0_LCSSA_PH_PH1_PH:%.*]] = phi i32 [ [[TEMP_015]], [[FOR_BODY]] ], [ [[TEMP_0_ADD]], [[IF_END]] ], [ [[TEMP_0_ADD_1]], [[IF_END_1]] ], [ [[TEMP_0_ADD_2]], [[IF_END_2]] ] +; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]] +; CHECK: cleanup.loopexit.epilog-lcssa.loopexit2: +; CHECK-NEXT: [[TEMP_0_LCSSA_PH_PH1_PH3:%.*]] = phi i32 [ [[TEMP_015_UNR]], [[FOR_BODY_EPIL]] ], [ [[TEMP_0_ADD_EPIL]], [[IF_END_EPIL]] ], [ [[TEMP_0_ADD_EPIL]], [[FOR_BODY_EPIL_1]] ], [ [[TEMP_0_ADD_EPIL_1]], [[IF_END_EPIL_1]] ], [ [[TEMP_0_ADD_EPIL_1]], [[FOR_BODY_EPIL_2]] ], [ [[TEMP_0_ADD_EPIL_2]], [[IF_END_EPIL_2]] ] +; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA]] +; CHECK: cleanup.loopexit.epilog-lcssa: +; CHECK-NEXT: [[TEMP_0_LCSSA_PH_PH1:%.*]] = phi i32 [ [[TEMP_0_LCSSA_PH_PH1_PH]], [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]] ], [ [[TEMP_0_LCSSA_PH_PH1_PH3]], [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] ] +; CHECK-NEXT: br label [[CLEANUP_LOOPEXIT]] ; CHECK: cleanup.loopexit: -; CHECK-NEXT: [[TEMP_0_LCSSA_PH:%.*]] = phi i32 [ [[TEMP_0_ADD]], [[IF_END]] ], [ [[TEMP_015]], [[FOR_BODY]] ], [ [[TEMP_0_ADD]], [[FOR_BODY_1]] ], [ [[TEMP_0_ADD_1]], [[IF_END_1]] ], [ [[TEMP_0_ADD_1]], [[FOR_BODY_2]] ], [ [[TEMP_0_ADD_2]], [[IF_END_2]] ], [ [[TEMP_0_ADD_2]], [[FOR_BODY_3]] ], [ [[TEMP_0_ADD_3]], [[IF_END_3]] ] +; CHECK-NEXT: [[TEMP_0_LCSSA_PH:%.*]] = phi i32 [ [[TEMP_0_LCSSA_PH_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_0_LCSSA_PH_PH1]], [[CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ] ; CHECK-NEXT: br label [[CLEANUP]] ; CHECK: cleanup: ; CHECK-NEXT: [[TEMP_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_0_LCSSA_PH]], [[CLEANUP_LOOPEXIT]] ] @@ -623,48 +687,271 @@ define void @test_five_blocks(i32* nocapture %Output, ; -; CHECK-LABEL: @test_five_blocks( -; CHECK-NEXT: entry: -; CHECK-NEXT: [[CMP24:%.*]] = icmp ugt i32 [[MAXJ:%.*]], 1 -; CHECK-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[CLEANUP:%.*]] -; CHECK: for.body.preheader: -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[J_026:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[TEMP_025:%.*]] = phi i32 [ [[TEMP_1:%.*]], [[FOR_INC]] ], [ 0, [[FOR_BODY_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_026]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TEMP_025]] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[ADD]], 16777215 -; CHECK-NEXT: br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT:%.*]], label [[IF_END:%.*]] -; CHECK: if.end: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_026]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ugt i32 [[TMP1]], 65535 -; CHECK-NEXT: br i1 [[CMP3]], label [[IF_THEN4:%.*]], label [[IF_ELSE:%.*]] -; CHECK: if.then4: -; CHECK-NEXT: [[SUB:%.*]] = add i32 [[J_026]], -1 -; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB]] -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 -; CHECK-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP0]], [[TMP2]] -; CHECK-NEXT: [[COND:%.*]] = zext i1 [[CMP7]] to i32 -; CHECK-NEXT: [[ADD8:%.*]] = add i32 [[ADD]], [[COND]] -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: if.else: -; CHECK-NEXT: [[AND:%.*]] = and i32 [[ADD]], [[TMP0]] -; CHECK-NEXT: br label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[TEMP_1]] = phi i32 [ [[ADD8]], [[IF_THEN4]] ], [ [[AND]], [[IF_ELSE]] ] -; CHECK-NEXT: [[INC]] = add nuw i32 [[J_026]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]] -; CHECK: cleanup.loopexit: -; CHECK-NEXT: [[TEMP_2_PH:%.*]] = phi i32 [ [[TEMP_1]], [[FOR_INC]] ], [ [[ADD]], [[FOR_BODY]] ] -; CHECK-NEXT: br label [[CLEANUP]] -; CHECK: cleanup: -; CHECK-NEXT: [[TEMP_2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_2_PH]], [[CLEANUP_LOOPEXIT]] ] -; CHECK-NEXT: store i32 [[TEMP_2]], i32* [[OUTPUT:%.*]], align 4 -; CHECK-NEXT: ret void +; CHECK-M33-LABEL: @test_five_blocks( +; CHECK-M33-NEXT: entry: +; CHECK-M33-NEXT: [[CMP24:%.*]] = icmp ugt i32 [[MAXJ:%.*]], 1 +; CHECK-M33-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[CLEANUP:%.*]] +; CHECK-M33: for.body.preheader: +; CHECK-M33-NEXT: [[TMP0:%.*]] = add i32 [[MAXJ]], -1 +; CHECK-M33-NEXT: [[TMP1:%.*]] = add i32 [[MAXJ]], -2 +; CHECK-M33-NEXT: [[XTRAITER:%.*]] = and i32 [[TMP0]], 3 +; CHECK-M33-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3 +; CHECK-M33-NEXT: br i1 [[TMP2]], label [[CLEANUP_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; CHECK-M33: for.body.preheader.new: +; CHECK-M33-NEXT: [[UNROLL_ITER:%.*]] = sub i32 [[TMP0]], [[XTRAITER]] +; CHECK-M33-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-M33: for.body: +; CHECK-M33-NEXT: [[J_026:%.*]] = phi i32 [ 1, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INC_3:%.*]], [[FOR_INC_3:%.*]] ] +; CHECK-M33-NEXT: [[TEMP_025:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[TEMP_1_3:%.*]], [[FOR_INC_3]] ] +; CHECK-M33-NEXT: [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_INC_3]] ] +; CHECK-M33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_026]] +; CHECK-M33-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-M33-NEXT: [[ADD:%.*]] = add i32 [[TMP3]], [[TEMP_025]] +; CHECK-M33-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[ADD]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT:%.*]], label [[IF_END:%.*]] +; CHECK-M33: if.end: +; CHECK-M33-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_026]] +; CHECK-M33-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-M33-NEXT: [[CMP3:%.*]] = icmp ugt i32 [[TMP4]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3]], label [[IF_THEN4:%.*]], label [[IF_ELSE:%.*]] +; CHECK-M33: if.then4: +; CHECK-M33-NEXT: [[SUB:%.*]] = add nsw i32 [[J_026]], -1 +; CHECK-M33-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB]] +; CHECK-M33-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-M33-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP3]], [[TMP5]] +; CHECK-M33-NEXT: [[COND:%.*]] = zext i1 [[CMP7]] to i32 +; CHECK-M33-NEXT: [[ADD8:%.*]] = add i32 [[ADD]], [[COND]] +; CHECK-M33-NEXT: br label [[FOR_INC:%.*]] +; CHECK-M33: if.else: +; CHECK-M33-NEXT: [[AND:%.*]] = and i32 [[ADD]], [[TMP3]] +; CHECK-M33-NEXT: br label [[FOR_INC]] +; CHECK-M33: for.inc: +; CHECK-M33-NEXT: [[TEMP_1:%.*]] = phi i32 [ [[ADD8]], [[IF_THEN4]] ], [ [[AND]], [[IF_ELSE]] ] +; CHECK-M33-NEXT: [[INC:%.*]] = add nuw nsw i32 [[J_026]], 1 +; CHECK-M33-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i32 [[NITER]], 1 +; CHECK-M33-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]] +; CHECK-M33-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-M33-NEXT: [[ADD_1:%.*]] = add i32 [[TMP6]], [[TEMP_1]] +; CHECK-M33-NEXT: [[CMP1_1:%.*]] = icmp ugt i32 [[ADD_1]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1_1]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]], label [[IF_END_1:%.*]] +; CHECK-M33: if.end.1: +; CHECK-M33-NEXT: [[ARRAYIDX2_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC]] +; CHECK-M33-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX2_1]], align 4 +; CHECK-M33-NEXT: [[CMP3_1:%.*]] = icmp ugt i32 [[TMP7]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3_1]], label [[IF_THEN4_1:%.*]], label [[IF_ELSE_1:%.*]] +; CHECK-M33: if.else.1: +; CHECK-M33-NEXT: [[AND_1:%.*]] = and i32 [[ADD_1]], [[TMP6]] +; CHECK-M33-NEXT: br label [[FOR_INC_1:%.*]] +; CHECK-M33: if.then4.1: +; CHECK-M33-NEXT: [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_026]] +; CHECK-M33-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX6_1]], align 4 +; CHECK-M33-NEXT: [[CMP7_1:%.*]] = icmp ugt i32 [[TMP6]], [[TMP8]] +; CHECK-M33-NEXT: [[COND_1:%.*]] = zext i1 [[CMP7_1]] to i32 +; CHECK-M33-NEXT: [[ADD8_1:%.*]] = add i32 [[ADD_1]], [[COND_1]] +; CHECK-M33-NEXT: br label [[FOR_INC_1]] +; CHECK-M33: for.inc.1: +; CHECK-M33-NEXT: [[TEMP_1_1:%.*]] = phi i32 [ [[ADD8_1]], [[IF_THEN4_1]] ], [ [[AND_1]], [[IF_ELSE_1]] ] +; CHECK-M33-NEXT: [[INC_1:%.*]] = add nuw nsw i32 [[INC]], 1 +; CHECK-M33-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i32 [[NITER_NEXT]], 1 +; CHECK-M33-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]] +; CHECK-M33-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-M33-NEXT: [[ADD_2:%.*]] = add i32 [[TMP9]], [[TEMP_1_1]] +; CHECK-M33-NEXT: [[CMP1_2:%.*]] = icmp ugt i32 [[ADD_2]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1_2]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]], label [[IF_END_2:%.*]] +; CHECK-M33: if.end.2: +; CHECK-M33-NEXT: [[ARRAYIDX2_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_1]] +; CHECK-M33-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX2_2]], align 4 +; CHECK-M33-NEXT: [[CMP3_2:%.*]] = icmp ugt i32 [[TMP10]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3_2]], label [[IF_THEN4_2:%.*]], label [[IF_ELSE_2:%.*]] +; CHECK-M33: if.else.2: +; CHECK-M33-NEXT: [[AND_2:%.*]] = and i32 [[ADD_2]], [[TMP9]] +; CHECK-M33-NEXT: br label [[FOR_INC_2:%.*]] +; CHECK-M33: if.then4.2: +; CHECK-M33-NEXT: [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC]] +; CHECK-M33-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX6_2]], align 4 +; CHECK-M33-NEXT: [[CMP7_2:%.*]] = icmp ugt i32 [[TMP9]], [[TMP11]] +; CHECK-M33-NEXT: [[COND_2:%.*]] = zext i1 [[CMP7_2]] to i32 +; CHECK-M33-NEXT: [[ADD8_2:%.*]] = add i32 [[ADD_2]], [[COND_2]] +; CHECK-M33-NEXT: br label [[FOR_INC_2]] +; CHECK-M33: for.inc.2: +; CHECK-M33-NEXT: [[TEMP_1_2:%.*]] = phi i32 [ [[ADD8_2]], [[IF_THEN4_2]] ], [ [[AND_2]], [[IF_ELSE_2]] ] +; CHECK-M33-NEXT: [[INC_2:%.*]] = add nuw i32 [[INC_1]], 1 +; CHECK-M33-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i32 [[NITER_NEXT_1]], 1 +; CHECK-M33-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_2]] +; CHECK-M33-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-M33-NEXT: [[ADD_3:%.*]] = add i32 [[TMP12]], [[TEMP_1_2]] +; CHECK-M33-NEXT: [[CMP1_3:%.*]] = icmp ugt i32 [[ADD_3]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1_3]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]], label [[IF_END_3:%.*]] +; CHECK-M33: if.end.3: +; CHECK-M33-NEXT: [[ARRAYIDX2_3:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_2]] +; CHECK-M33-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX2_3]], align 4 +; CHECK-M33-NEXT: [[CMP3_3:%.*]] = icmp ugt i32 [[TMP13]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3_3]], label [[IF_THEN4_3:%.*]], label [[IF_ELSE_3:%.*]] +; CHECK-M33: if.else.3: +; CHECK-M33-NEXT: [[AND_3:%.*]] = and i32 [[ADD_3]], [[TMP12]] +; CHECK-M33-NEXT: br label [[FOR_INC_3]] +; CHECK-M33: if.then4.3: +; CHECK-M33-NEXT: [[ARRAYIDX6_3:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_1]] +; CHECK-M33-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX6_3]], align 4 +; CHECK-M33-NEXT: [[CMP7_3:%.*]] = icmp ugt i32 [[TMP12]], [[TMP14]] +; CHECK-M33-NEXT: [[COND_3:%.*]] = zext i1 [[CMP7_3]] to i32 +; CHECK-M33-NEXT: [[ADD8_3:%.*]] = add i32 [[ADD_3]], [[COND_3]] +; CHECK-M33-NEXT: br label [[FOR_INC_3]] +; CHECK-M33: for.inc.3: +; CHECK-M33-NEXT: [[TEMP_1_3]] = phi i32 [ [[ADD8_3]], [[IF_THEN4_3]] ], [ [[AND_3]], [[IF_ELSE_3]] ] +; CHECK-M33-NEXT: [[INC_3]] = add nuw nsw i32 [[INC_2]], 1 +; CHECK-M33-NEXT: [[NITER_NEXT_3]] = add i32 [[NITER_NEXT_2]], 1 +; CHECK-M33-NEXT: [[NITER_NCMP_3:%.*]] = icmp ne i32 [[NITER_NEXT_3]], [[UNROLL_ITER]] +; CHECK-M33-NEXT: br i1 [[NITER_NCMP_3]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]] +; CHECK-M33: cleanup.loopexit.unr-lcssa.loopexit: +; CHECK-M33-NEXT: [[TEMP_2_PH_PH_PH:%.*]] = phi i32 [ [[TEMP_1_3]], [[FOR_INC_3]] ] +; CHECK-M33-NEXT: [[J_026_UNR_PH:%.*]] = phi i32 [ [[INC_3]], [[FOR_INC_3]] ] +; CHECK-M33-NEXT: [[TEMP_025_UNR_PH:%.*]] = phi i32 [ [[TEMP_1_3]], [[FOR_INC_3]] ] +; CHECK-M33-NEXT: br label [[CLEANUP_LOOPEXIT_UNR_LCSSA]] +; CHECK-M33: cleanup.loopexit.unr-lcssa: +; CHECK-M33-NEXT: [[TEMP_2_PH_PH:%.*]] = phi i32 [ undef, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_2_PH_PH_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-M33-NEXT: [[J_026_UNR:%.*]] = phi i32 [ 1, [[FOR_BODY_PREHEADER]] ], [ [[J_026_UNR_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-M33-NEXT: [[TEMP_025_UNR:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TEMP_025_UNR_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-M33-NEXT: [[LCMP_MOD:%.*]] = icmp ne i32 [[XTRAITER]], 0 +; CHECK-M33-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_EPIL_PREHEADER:%.*]], label [[CLEANUP_LOOPEXIT:%.*]] +; CHECK-M33: for.body.epil.preheader: +; CHECK-M33-NEXT: br label [[FOR_BODY_EPIL:%.*]] +; CHECK-M33: for.body.epil: +; CHECK-M33-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_026_UNR]] +; CHECK-M33-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 +; CHECK-M33-NEXT: [[ADD_EPIL:%.*]] = add i32 [[TMP15]], [[TEMP_025_UNR]] +; CHECK-M33-NEXT: [[CMP1_EPIL:%.*]] = icmp ugt i32 [[ADD_EPIL]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1_EPIL]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2:%.*]], label [[IF_END_EPIL:%.*]] +; CHECK-M33: if.end.epil: +; CHECK-M33-NEXT: [[ARRAYIDX2_EPIL:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[J_026_UNR]] +; CHECK-M33-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL]], align 4 +; CHECK-M33-NEXT: [[CMP3_EPIL:%.*]] = icmp ugt i32 [[TMP16]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3_EPIL]], label [[IF_THEN4_EPIL:%.*]], label [[IF_ELSE_EPIL:%.*]] +; CHECK-M33: if.else.epil: +; CHECK-M33-NEXT: [[AND_EPIL:%.*]] = and i32 [[ADD_EPIL]], [[TMP15]] +; CHECK-M33-NEXT: br label [[FOR_INC_EPIL:%.*]] +; CHECK-M33: if.then4.epil: +; CHECK-M33-NEXT: [[SUB_EPIL:%.*]] = add i32 [[J_026_UNR]], -1 +; CHECK-M33-NEXT: [[ARRAYIDX6_EPIL:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB_EPIL]] +; CHECK-M33-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDX6_EPIL]], align 4 +; CHECK-M33-NEXT: [[CMP7_EPIL:%.*]] = icmp ugt i32 [[TMP15]], [[TMP17]] +; CHECK-M33-NEXT: [[COND_EPIL:%.*]] = zext i1 [[CMP7_EPIL]] to i32 +; CHECK-M33-NEXT: [[ADD8_EPIL:%.*]] = add i32 [[ADD_EPIL]], [[COND_EPIL]] +; CHECK-M33-NEXT: br label [[FOR_INC_EPIL]] +; CHECK-M33: for.inc.epil: +; CHECK-M33-NEXT: [[TEMP_1_EPIL:%.*]] = phi i32 [ [[ADD8_EPIL]], [[IF_THEN4_EPIL]] ], [ [[AND_EPIL]], [[IF_ELSE_EPIL]] ] +; CHECK-M33-NEXT: [[INC_EPIL:%.*]] = add nuw i32 [[J_026_UNR]], 1 +; CHECK-M33-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i32 1, [[XTRAITER]] +; CHECK-M33-NEXT: br i1 [[EPIL_ITER_CMP]], label [[FOR_BODY_EPIL_1:%.*]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] +; CHECK-M33: for.body.epil.1: +; CHECK-M33-NEXT: [[ARRAYIDX_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]] +; CHECK-M33-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_1]], align 4 +; CHECK-M33-NEXT: [[ADD_EPIL_1:%.*]] = add i32 [[TMP18]], [[TEMP_1_EPIL]] +; CHECK-M33-NEXT: [[CMP1_EPIL_1:%.*]] = icmp ugt i32 [[ADD_EPIL_1]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1_EPIL_1]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]], label [[IF_END_EPIL_1:%.*]] +; CHECK-M33: if.end.epil.1: +; CHECK-M33-NEXT: [[ARRAYIDX2_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL]] +; CHECK-M33-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_1]], align 4 +; CHECK-M33-NEXT: [[CMP3_EPIL_1:%.*]] = icmp ugt i32 [[TMP19]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3_EPIL_1]], label [[IF_THEN4_EPIL_1:%.*]], label [[IF_ELSE_EPIL_1:%.*]] +; CHECK-M33: if.else.epil.1: +; CHECK-M33-NEXT: [[AND_EPIL_1:%.*]] = and i32 [[ADD_EPIL_1]], [[TMP18]] +; CHECK-M33-NEXT: br label [[FOR_INC_EPIL_1:%.*]] +; CHECK-M33: if.then4.epil.1: +; CHECK-M33-NEXT: [[ARRAYIDX6_EPIL_1:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[J_026_UNR]] +; CHECK-M33-NEXT: [[TMP20:%.*]] = load i32, i32* [[ARRAYIDX6_EPIL_1]], align 4 +; CHECK-M33-NEXT: [[CMP7_EPIL_1:%.*]] = icmp ugt i32 [[TMP18]], [[TMP20]] +; CHECK-M33-NEXT: [[COND_EPIL_1:%.*]] = zext i1 [[CMP7_EPIL_1]] to i32 +; CHECK-M33-NEXT: [[ADD8_EPIL_1:%.*]] = add i32 [[ADD_EPIL_1]], [[COND_EPIL_1]] +; CHECK-M33-NEXT: br label [[FOR_INC_EPIL_1]] +; CHECK-M33: for.inc.epil.1: +; CHECK-M33-NEXT: [[TEMP_1_EPIL_1:%.*]] = phi i32 [ [[ADD8_EPIL_1]], [[IF_THEN4_EPIL_1]] ], [ [[AND_EPIL_1]], [[IF_ELSE_EPIL_1]] ] +; CHECK-M33-NEXT: [[INC_EPIL_1:%.*]] = add nuw i32 [[INC_EPIL]], 1 +; CHECK-M33-NEXT: [[EPIL_ITER_CMP_1:%.*]] = icmp ne i32 2, [[XTRAITER]] +; CHECK-M33-NEXT: br i1 [[EPIL_ITER_CMP_1]], label [[FOR_BODY_EPIL_2:%.*]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] +; CHECK-M33: for.body.epil.2: +; CHECK-M33-NEXT: [[ARRAYIDX_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL_1]] +; CHECK-M33-NEXT: [[TMP21:%.*]] = load i32, i32* [[ARRAYIDX_EPIL_2]], align 4 +; CHECK-M33-NEXT: [[ADD_EPIL_2:%.*]] = add i32 [[TMP21]], [[TEMP_1_EPIL_1]] +; CHECK-M33-NEXT: [[CMP1_EPIL_2:%.*]] = icmp ugt i32 [[ADD_EPIL_2]], 16777215 +; CHECK-M33-NEXT: br i1 [[CMP1_EPIL_2]], label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]], label [[IF_END_EPIL_2:%.*]] +; CHECK-M33: if.end.epil.2: +; CHECK-M33-NEXT: [[ARRAYIDX2_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION]], i32 [[INC_EPIL_1]] +; CHECK-M33-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX2_EPIL_2]], align 4 +; CHECK-M33-NEXT: [[CMP3_EPIL_2:%.*]] = icmp ugt i32 [[TMP22]], 65535 +; CHECK-M33-NEXT: br i1 [[CMP3_EPIL_2]], label [[IF_THEN4_EPIL_2:%.*]], label [[IF_ELSE_EPIL_2:%.*]] +; CHECK-M33: if.else.epil.2: +; CHECK-M33-NEXT: [[AND_EPIL_2:%.*]] = and i32 [[ADD_EPIL_2]], [[TMP21]] +; CHECK-M33-NEXT: br label [[FOR_INC_EPIL_2:%.*]] +; CHECK-M33: if.then4.epil.2: +; CHECK-M33-NEXT: [[ARRAYIDX6_EPIL_2:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[INC_EPIL]] +; CHECK-M33-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX6_EPIL_2]], align 4 +; CHECK-M33-NEXT: [[CMP7_EPIL_2:%.*]] = icmp ugt i32 [[TMP21]], [[TMP23]] +; CHECK-M33-NEXT: [[COND_EPIL_2:%.*]] = zext i1 [[CMP7_EPIL_2]] to i32 +; CHECK-M33-NEXT: [[ADD8_EPIL_2:%.*]] = add i32 [[ADD_EPIL_2]], [[COND_EPIL_2]] +; CHECK-M33-NEXT: br label [[FOR_INC_EPIL_2]] +; CHECK-M33: for.inc.epil.2: +; CHECK-M33-NEXT: [[TEMP_1_EPIL_2:%.*]] = phi i32 [ [[ADD8_EPIL_2]], [[IF_THEN4_EPIL_2]] ], [ [[AND_EPIL_2]], [[IF_ELSE_EPIL_2]] ] +; CHECK-M33-NEXT: br label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] +; CHECK-M33: cleanup.loopexit.epilog-lcssa.loopexit: +; CHECK-M33-NEXT: [[TEMP_2_PH_PH1_PH:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_INC]] ], [ [[ADD_2]], [[FOR_INC_1]] ], [ [[ADD_3]], [[FOR_INC_2]] ] +; CHECK-M33-NEXT: br label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA:%.*]] +; CHECK-M33: cleanup.loopexit.epilog-lcssa.loopexit2: +; CHECK-M33-NEXT: [[TEMP_2_PH_PH1_PH3:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ], [ [[TEMP_1_EPIL]], [[FOR_INC_EPIL]] ], [ [[ADD_EPIL_1]], [[FOR_BODY_EPIL_1]] ], [ [[TEMP_1_EPIL_1]], [[FOR_INC_EPIL_1]] ], [ [[ADD_EPIL_2]], [[FOR_BODY_EPIL_2]] ], [ [[TEMP_1_EPIL_2]], [[FOR_INC_EPIL_2]] ] +; CHECK-M33-NEXT: br label [[CLEANUP_LOOPEXIT_EPILOG_LCSSA]] +; CHECK-M33: cleanup.loopexit.epilog-lcssa: +; CHECK-M33-NEXT: [[TEMP_2_PH_PH1:%.*]] = phi i32 [ [[TEMP_2_PH_PH1_PH]], [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT]] ], [ [[TEMP_2_PH_PH1_PH3]], [[CLEANUP_LOOPEXIT_EPILOG_LCSSA_LOOPEXIT2]] ] +; CHECK-M33-NEXT: br label [[CLEANUP_LOOPEXIT]] +; CHECK-M33: cleanup.loopexit: +; CHECK-M33-NEXT: [[TEMP_2_PH:%.*]] = phi i32 [ [[TEMP_2_PH_PH]], [[CLEANUP_LOOPEXIT_UNR_LCSSA]] ], [ [[TEMP_2_PH_PH1]], [[CLEANUP_LOOPEXIT_EPILOG_LCSSA]] ] +; CHECK-M33-NEXT: br label [[CLEANUP]] +; CHECK-M33: cleanup: +; CHECK-M33-NEXT: [[TEMP_2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_2_PH]], [[CLEANUP_LOOPEXIT]] ] +; CHECK-M33-NEXT: store i32 [[TEMP_2]], i32* [[OUTPUT:%.*]], align 4 +; CHECK-M33-NEXT: ret void +; +; CHECK-M7-LABEL: @test_five_blocks( +; CHECK-M7-NEXT: entry: +; CHECK-M7-NEXT: [[CMP24:%.*]] = icmp ugt i32 [[MAXJ:%.*]], 1 +; CHECK-M7-NEXT: br i1 [[CMP24]], label [[FOR_BODY_PREHEADER:%.*]], label [[CLEANUP:%.*]] +; CHECK-M7: for.body.preheader: +; CHECK-M7-NEXT: br label [[FOR_BODY:%.*]] +; CHECK-M7: for.body: +; CHECK-M7-NEXT: [[J_026:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 1, [[FOR_BODY_PREHEADER]] ] +; CHECK-M7-NEXT: [[TEMP_025:%.*]] = phi i32 [ [[TEMP_1:%.*]], [[FOR_INC]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; CHECK-M7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[INPUT:%.*]], i32 [[J_026]] +; CHECK-M7-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-M7-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TEMP_025]] +; CHECK-M7-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[ADD]], 16777215 +; CHECK-M7-NEXT: br i1 [[CMP1]], label [[CLEANUP_LOOPEXIT:%.*]], label [[IF_END:%.*]] +; CHECK-M7: if.end: +; CHECK-M7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[CONDITION:%.*]], i32 [[J_026]] +; CHECK-M7-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 +; CHECK-M7-NEXT: [[CMP3:%.*]] = icmp ugt i32 [[TMP1]], 65535 +; CHECK-M7-NEXT: br i1 [[CMP3]], label [[IF_THEN4:%.*]], label [[IF_ELSE:%.*]] +; CHECK-M7: if.then4: +; CHECK-M7-NEXT: [[SUB:%.*]] = add i32 [[J_026]], -1 +; CHECK-M7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[INPUT]], i32 [[SUB]] +; CHECK-M7-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +; CHECK-M7-NEXT: [[CMP7:%.*]] = icmp ugt i32 [[TMP0]], [[TMP2]] +; CHECK-M7-NEXT: [[COND:%.*]] = zext i1 [[CMP7]] to i32 +; CHECK-M7-NEXT: [[ADD8:%.*]] = add i32 [[ADD]], [[COND]] +; CHECK-M7-NEXT: br label [[FOR_INC]] +; CHECK-M7: if.else: +; CHECK-M7-NEXT: [[AND:%.*]] = and i32 [[ADD]], [[TMP0]] +; CHECK-M7-NEXT: br label [[FOR_INC]] +; CHECK-M7: for.inc: +; CHECK-M7-NEXT: [[TEMP_1]] = phi i32 [ [[ADD8]], [[IF_THEN4]] ], [ [[AND]], [[IF_ELSE]] ] +; CHECK-M7-NEXT: [[INC]] = add nuw i32 [[J_026]], 1 +; CHECK-M7-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[MAXJ]] +; CHECK-M7-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[CLEANUP_LOOPEXIT]] +; CHECK-M7: cleanup.loopexit: +; CHECK-M7-NEXT: [[TEMP_2_PH:%.*]] = phi i32 [ [[TEMP_1]], [[FOR_INC]] ], [ [[ADD]], [[FOR_BODY]] ] +; CHECK-M7-NEXT: br label [[CLEANUP]] +; CHECK-M7: cleanup: +; CHECK-M7-NEXT: [[TEMP_2:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TEMP_2_PH]], [[CLEANUP_LOOPEXIT]] ] +; CHECK-M7-NEXT: store i32 [[TEMP_2]], i32* [[OUTPUT:%.*]], align 4 +; CHECK-M7-NEXT: ret void ; i32* nocapture readonly %Condition, i32* nocapture readonly %Input, Index: llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop-at-most-two-exits.ll @@ -1,152 +1,129 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-other-exit-predictable=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -S | FileCheck %s --check-prefix=ENABLED -; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-other-exit-predictable=false -verify-loop-lcssa -verify-dom-info -verify-loop-info -S | FileCheck %s --check-prefix=DISABLED +; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -verify-loop-lcssa -verify-dom-info -verify-loop-info -S | FileCheck %s --check-prefix=CHECK define i32 @test(i32* nocapture %a, i64 %n) { -; ENABLED-LABEL: @test( -; ENABLED-NEXT: entry: -; ENABLED-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 -; ENABLED-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 -; ENABLED-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 -; ENABLED-NEXT: br i1 [[TMP1]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] -; ENABLED: entry.new: -; ENABLED-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] -; ENABLED-NEXT: br label [[HEADER:%.*]] -; ENABLED: header: -; ENABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ] -; ENABLED-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7]] ] -; ENABLED-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY_7]] ] -; ENABLED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_BODY:%.*]] -; ENABLED: for.body: -; ENABLED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; ENABLED-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; ENABLED-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; ENABLED-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1 -; ENABLED-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]] -; ENABLED: for.body.1: -; ENABLED-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] -; ENABLED-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 -; ENABLED-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; ENABLED-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1 -; ENABLED-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_2:%.*]] -; ENABLED: for.body.2: -; ENABLED-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] -; ENABLED-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 -; ENABLED-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; ENABLED-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1 -; ENABLED-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]] -; ENABLED: for.body.3: -; ENABLED-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] -; ENABLED-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 -; ENABLED-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; ENABLED-NEXT: [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1 -; ENABLED-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_4:%.*]] -; ENABLED: for.body.4: -; ENABLED-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] -; ENABLED-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 -; ENABLED-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; ENABLED-NEXT: [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1 -; ENABLED-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]] -; ENABLED: for.body.5: -; ENABLED-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] -; ENABLED-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 -; ENABLED-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; ENABLED-NEXT: [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1 -; ENABLED-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_6:%.*]] -; ENABLED: for.body.6: -; ENABLED-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] -; ENABLED-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 -; ENABLED-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; ENABLED-NEXT: [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1 -; ENABLED-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]] -; ENABLED: for.body.7: -; ENABLED-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] -; ENABLED-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 -; ENABLED-NEXT: [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1 -; ENABLED-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1 -; ENABLED-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] -; ENABLED-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]] -; ENABLED: for.end.unr-lcssa.loopexit: -; ENABLED-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ] -; ENABLED-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY_7]] ] -; ENABLED-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ] -; ENABLED-NEXT: br label [[FOR_END_UNR_LCSSA]] -; ENABLED: for.end.unr-lcssa: -; ENABLED-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] -; ENABLED-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] -; ENABLED-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] -; ENABLED-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 -; ENABLED-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[FOR_END:%.*]] -; ENABLED: header.epil.preheader: -; ENABLED-NEXT: br label [[HEADER_EPIL:%.*]] -; ENABLED: header.epil: -; ENABLED-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ] -; ENABLED-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ] -; ENABLED-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ] -; ENABLED-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42 -; ENABLED-NEXT: br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_BODY_EPIL]] -; ENABLED: for.body.epil: -; ENABLED-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]] -; ENABLED-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 -; ENABLED-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]] -; ENABLED-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1 -; ENABLED-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]] -; ENABLED-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 -; ENABLED-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] -; ENABLED-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[FOR_END_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] -; ENABLED: for.end.epilog-lcssa: -; ENABLED-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ] -; ENABLED-NEXT: br label [[FOR_END]] -; ENABLED: for.end: -; ENABLED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[FOR_END_EPILOG_LCSSA]] ] -; ENABLED-NEXT: ret i32 [[SUM_0_LCSSA]] -; ENABLED: for.exit2.loopexit: -; ENABLED-NEXT: [[RETVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ] -; ENABLED-NEXT: br label [[FOR_EXIT2:%.*]] -; ENABLED: for.exit2.loopexit2: -; ENABLED-NEXT: [[RETVAL_PH3:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[HEADER_EPIL]] ] -; ENABLED-NEXT: br label [[FOR_EXIT2]] -; ENABLED: for.exit2: -; ENABLED-NEXT: [[RETVAL:%.*]] = phi i32 [ [[RETVAL_PH]], [[FOR_EXIT2_LOOPEXIT]] ], [ [[RETVAL_PH3]], [[FOR_EXIT2_LOOPEXIT2]] ] -; ENABLED-NEXT: ret i32 [[RETVAL]] -; -; DISABLED-LABEL: @test( -; DISABLED-NEXT: entry: -; DISABLED-NEXT: br label [[HEADER:%.*]] -; DISABLED: header: -; DISABLED-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; DISABLED-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ] -; DISABLED-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42 -; DISABLED-NEXT: br i1 [[CMP]], label [[FOR_EXIT2:%.*]], label [[FOR_BODY]] -; DISABLED: for.body: -; DISABLED-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; DISABLED-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]] -; DISABLED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; DISABLED-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; DISABLED-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[HEADER]] -; DISABLED: for.end: -; DISABLED-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ] -; DISABLED-NEXT: ret i32 [[SUM_0_LCSSA]] -; DISABLED: for.exit2: -; DISABLED-NEXT: [[RETVAL:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ] -; DISABLED-NEXT: ret i32 [[RETVAL]] +; CHECK-LABEL: @test( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] +; CHECK: entry.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[FOR_BODY_7]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]] +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[NITER_NEXT:%.*]] = add nuw nsw i64 [[NITER]], 1 +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_1:%.*]] +; CHECK: for.body.1: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 +; CHECK-NEXT: [[NITER_NEXT_1:%.*]] = add nuw nsw i64 [[NITER_NEXT]], 1 +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_2:%.*]] +; CHECK: for.body.2: +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 +; CHECK-NEXT: [[NITER_NEXT_2:%.*]] = add nuw nsw i64 [[NITER_NEXT_1]], 1 +; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_3:%.*]] +; CHECK: for.body.3: +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 +; CHECK-NEXT: [[NITER_NEXT_3:%.*]] = add nuw nsw i64 [[NITER_NEXT_2]], 1 +; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_4:%.*]] +; CHECK: for.body.4: +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 +; CHECK-NEXT: [[NITER_NEXT_4:%.*]] = add nuw nsw i64 [[NITER_NEXT_3]], 1 +; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_5:%.*]] +; CHECK: for.body.5: +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 +; CHECK-NEXT: [[NITER_NEXT_5:%.*]] = add nuw nsw i64 [[NITER_NEXT_4]], 1 +; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_6:%.*]] +; CHECK: for.body.6: +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 +; CHECK-NEXT: [[NITER_NEXT_6:%.*]] = add nuw nsw i64 [[NITER_NEXT_5]], 1 +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]] +; CHECK: for.body.7: +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV_NEXT_6]], 1 +; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER_NEXT_6]], 1 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[FOR_END_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]] +; CHECK: for.end.unr-lcssa.loopexit: +; CHECK-NEXT: [[SUM_0_LCSSA_PH_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ] +; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_7]], [[FOR_BODY_7]] ] +; CHECK-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_7]], [[FOR_BODY_7]] ] +; CHECK-NEXT: br label [[FOR_END_UNR_LCSSA]] +; CHECK: for.end.unr-lcssa: +; CHECK-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_PH_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[FOR_END_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_EPIL_PREHEADER:%.*]], label [[FOR_END:%.*]] +; CHECK: header.epil.preheader: +; CHECK-NEXT: br label [[HEADER_EPIL:%.*]] +; CHECK: header.epil: +; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ 0, [[HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_NEXT:%.*]], [[FOR_BODY_EPIL]] ] +; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_BODY_EPIL]] +; CHECK: for.body.epil: +; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 +; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1 +; CHECK-NEXT: [[EXITCOND_EPIL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_EPIL]], [[N]] +; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp ne i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label [[HEADER_EPIL]], label [[FOR_END_EPILOG_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: for.end.epilog-lcssa: +; CHECK-NEXT: [[SUM_0_LCSSA_PH1:%.*]] = phi i32 [ [[ADD_EPIL]], [[FOR_BODY_EPIL]] ] +; CHECK-NEXT: br label [[FOR_END]] +; CHECK: for.end: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ], [ [[SUM_0_LCSSA_PH1]], [[FOR_END_EPILOG_LCSSA]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK: for.exit2.loopexit: +; CHECK-NEXT: [[RETVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[FOR_BODY]] ], [ [[ADD_1]], [[FOR_BODY_1]] ], [ [[ADD_2]], [[FOR_BODY_2]] ], [ [[ADD_3]], [[FOR_BODY_3]] ], [ [[ADD_4]], [[FOR_BODY_4]] ], [ [[ADD_5]], [[FOR_BODY_5]] ], [ [[ADD_6]], [[FOR_BODY_6]] ] +; CHECK-NEXT: br label [[FOR_EXIT2:%.*]] +; CHECK: for.exit2.loopexit2: +; CHECK-NEXT: [[RETVAL_PH3:%.*]] = phi i32 [ [[SUM_02_EPIL]], [[HEADER_EPIL]] ] +; CHECK-NEXT: br label [[FOR_EXIT2]] +; CHECK: for.exit2: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[RETVAL_PH]], [[FOR_EXIT2_LOOPEXIT]] ], [ [[RETVAL_PH3]], [[FOR_EXIT2_LOOPEXIT2]] ] +; CHECK-NEXT: ret i32 [[RETVAL]] ; entry: br label %header Index: llvm/test/Transforms/LoopUnroll/runtime-loop.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -239,39 +239,6 @@ br i1 %cmp, label %header, label %LoopExit } -; multiple exit blocks. don't unroll -define void @multi_exit(i64 %trip, i1 %cond) { -; COMMON-LABEL: @multi_exit( -; COMMON-NOT: .unr - -entry: - br label %loop_header - -loop_header: - %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] - br i1 %cond, label %loop_latch, label %loop_exiting_bb1 - -loop_exiting_bb1: - br i1 false, label %loop_exiting_bb2, label %exit1 - -loop_exiting_bb2: - br i1 false, label %loop_latch, label %exit3 - -exit3: - ret void - -loop_latch: - %iv_next = add i64 %iv, 1 - %cmp = icmp ne i64 %iv_next, %trip - br i1 %cmp, label %loop_header, label %exit2.loopexit - -exit1: - ret void - -exit2.loopexit: - ret void -} - !0 = distinct !{!0, !1} !1 = !{!"llvm.loop.unroll.runtime.disable"} Index: llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll +++ llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -unroll-runtime-other-exit-predictable=false -loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s -; RUN: opt < %s -unroll-runtime-other-exit-predictable=false -loop-unroll -unroll-runtime=true -verify-dom-info -unroll-runtime-multi-exit=false -verify-loop-info -S | FileCheck %s -check-prefix=NOUNROLL +; RUN: opt < %s -loop-unroll -unroll-runtime=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-runtime=true -verify-dom-info -unroll-runtime-multi-exit=false -verify-loop-info -S | FileCheck %s -check-prefix=NOUNROLL ; this tests when unrolling multiple exit loop occurs by default (i.e. without specifying -unroll-runtime-multi-exit) @@ -195,25 +195,131 @@ ; ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; CHECK-NEXT: br i1 [[TMP1]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[ENTRY_NEW:%.*]] +; CHECK: entry.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[N]], -8 ; CHECK-NEXT: br label [[HEADER:%.*]] ; CHECK: header: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[ADD:%.*]], [[LATCH]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[NITER_NEXT_7:%.*]], [[LATCH_7]] ] ; CHECK-NEXT: br label [[FOR_EXITING_BLOCK:%.*]] ; CHECK: for.exiting_block: -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N:%.*]], 42 -; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT:%.*]], label [[LATCH]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP]], label [[OTHEREXIT_LOOPEXIT:%.*]], label [[LATCH:%.*]] ; CHECK: latch: ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[ADD]] = add nsw i32 [[TMP0]], [[SUM_02]] -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LATCHEXIT:%.*]], label [[HEADER]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], [[SUM_02]] +; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]] +; CHECK: for.exiting_block.1: +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_1]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]] +; CHECK: latch.1: +; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP3]], [[ADD]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or i64 [[INDVARS_IV]], 2 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]] +; CHECK: for.exiting_block.2: +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_2]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]] +; CHECK: latch.2: +; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP4]], [[ADD_1]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or i64 [[INDVARS_IV]], 3 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]] +; CHECK: for.exiting_block.3: +; CHECK-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_3]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]] +; CHECK: latch.3: +; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP5]], [[ADD_2]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or i64 [[INDVARS_IV]], 4 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]] +; CHECK: for.exiting_block.4: +; CHECK-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_4]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]] +; CHECK: latch.4: +; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP6]], [[ADD_3]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or i64 [[INDVARS_IV]], 5 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]] +; CHECK: for.exiting_block.5: +; CHECK-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_5]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]] +; CHECK: latch.5: +; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP7]], [[ADD_4]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or i64 [[INDVARS_IV]], 6 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]] +; CHECK: for.exiting_block.6: +; CHECK-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_6]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]] +; CHECK: latch.6: +; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP8]], [[ADD_5]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or i64 [[INDVARS_IV]], 7 +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]] +; CHECK: for.exiting_block.7: +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_7]], label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]] +; CHECK: latch.7: +; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP9]], [[ADD_6]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]] +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label [[LATCHEXIT_UNR_LCSSA_LOOPEXIT:%.*]], label [[HEADER]] +; CHECK: latchexit.unr-lcssa.loopexit: +; CHECK-NEXT: br label [[LATCHEXIT_UNR_LCSSA]] +; CHECK: latchexit.unr-lcssa: +; CHECK-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], [[LATCHEXIT_UNR_LCSSA_LOOPEXIT]] ] +; CHECK-NEXT: [[LCMP_MOD_NOT:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD_NOT]], label [[LATCHEXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]] +; CHECK: header.epil.preheader: +; CHECK-NEXT: br label [[HEADER_EPIL:%.*]] +; CHECK: header.epil: +; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_NEXT:%.*]], [[LATCH_EPIL]] ], [ 0, [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_EPIL:%.*]] +; CHECK: for.exiting_block.epil: +; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT2:%.*]], label [[LATCH_EPIL]] +; CHECK: latch.epil: +; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_EPIL]] +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_EPIL]], align 4 +; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP10]], [[SUM_02_EPIL]] +; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1 +; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1 +; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]] +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP_NOT]], label [[LATCHEXIT_EPILOG_LCSSA:%.*]], label [[HEADER_EPIL]], !llvm.loop [[LOOP2:![0-9]+]] +; CHECK: latchexit.epilog-lcssa: +; CHECK-NEXT: br label [[LATCHEXIT]] ; CHECK: latchexit: -; CHECK-NEXT: ret i32 [[ADD]] +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], [[LATCHEXIT_UNR_LCSSA]] ], [ [[ADD_EPIL]], [[LATCHEXIT_EPILOG_LCSSA]] ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK: otherexit.loopexit: +; CHECK-NEXT: [[RVAL_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1]], [[FOR_EXITING_BLOCK_2]] ], [ [[ADD_2]], [[FOR_EXITING_BLOCK_3]] ], [ [[ADD_3]], [[FOR_EXITING_BLOCK_4]] ], [ [[ADD_4]], [[FOR_EXITING_BLOCK_5]] ], [ [[ADD_5]], [[FOR_EXITING_BLOCK_6]] ], [ [[ADD_6]], [[FOR_EXITING_BLOCK_7]] ] +; CHECK-NEXT: br label [[OTHEREXIT:%.*]] +; CHECK: otherexit.loopexit2: +; CHECK-NEXT: br label [[OTHEREXIT]] ; CHECK: otherexit: -; CHECK-NEXT: ret i32 [[SUM_02]] +; CHECK-NEXT: [[RVAL:%.*]] = phi i32 [ [[RVAL_PH]], [[OTHEREXIT_LOOPEXIT]] ], [ [[SUM_02_EPIL]], [[OTHEREXIT_LOOPEXIT2]] ] +; CHECK-NEXT: ret i32 [[RVAL]] ; ; NOUNROLL-LABEL: @test2( ; NOUNROLL-NEXT: entry: