Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -36,6 +36,7 @@ #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/UnrollLoop.h" #include @@ -45,6 +46,10 @@ STATISTIC(NumRuntimeUnrolled, "Number of loops unrolled with run-time trip counts"); +static cl::opt UnrollRuntimeMultiExit( + "unroll-runtime-multi-exit", cl::init(false), cl::Hidden, + cl::desc("Allow runtime unrolling for loops with multiple exits, when " + "epilog is generated")); /// Connect the unrolling prolog code to the original loop. /// The unrolling prolog code contains code to execute the @@ -285,15 +290,13 @@ /// The cloned blocks should be inserted between InsertTop and InsertBot. /// If loop structure is cloned InsertTop should be new preheader, InsertBot /// new loop exit. -/// -static void CloneLoopBlocks(Loop *L, Value *NewIter, - const bool CreateRemainderLoop, - const bool UseEpilogRemainder, - BasicBlock *InsertTop, BasicBlock *InsertBot, - BasicBlock *Preheader, - std::vector &NewBlocks, - LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap, - DominatorTree *DT, LoopInfo *LI) { +/// Return the new cloned loop that is created when CreateRemainderLoop is true. +static Loop * +CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, + const bool UseEpilogRemainder, BasicBlock *InsertTop, + BasicBlock *InsertBot, BasicBlock *Preheader, + std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, + ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); @@ -418,7 +421,10 @@ // Set operand 0 to refer to the loop id itself. NewLoopID->replaceOperandWith(0, NewLoopID); NewLoop->setLoopID(NewLoopID); + return NewLoop; } + else + return nullptr; } /// Insert code in the prolog/epilog code when unrolling a loop with a @@ -465,29 +471,52 @@ LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, bool PreserveLCSSA) { // for now, only unroll loops that contain a single exit - if (!L->getExitingBlock()) + if (!UnrollRuntimeMultiExit && !L->getExitingBlock()) return false; - // Make sure the loop is in canonical form, and there is a single - // exit block only. + // Make sure the loop is in canonical form. if (!L->isLoopSimplifyForm()) return false; // Guaranteed by LoopSimplifyForm. BasicBlock *Latch = L->getLoopLatch(); + BasicBlock *Header = L->getHeader(); BasicBlock *LatchExit = L->getUniqueExitBlock(); // successor out of loop - if (!LatchExit) + if (!LatchExit && !UnrollRuntimeMultiExit) return false; + // These are exit blocks other than the target of the latch exiting block. + SmallVector OtherExits; + BranchInst *LatchBR = cast(Latch->getTerminator()); + unsigned int ExitIndex = LatchBR->getSuccessor(0) == Header ? 1 : 0; // Cloning the loop basic blocks (`CloneLoopBlocks`) requires that one of the - // targets of the Latch be the single exit block out of the loop. This needs + // targets of the Latch be an exit block out of the loop. This needs // to be guaranteed by the callers of UnrollRuntimeLoopRemainder. - BranchInst *LatchBR = cast(Latch->getTerminator()); - assert((LatchBR->getSuccessor(0) == LatchExit || - LatchBR->getSuccessor(1) == LatchExit) && - "one of the loop latch successors should be " - "the exit block!"); - (void)LatchBR; + assert(!L->contains(LatchBR->getSuccessor(ExitIndex)) && + "one of the loop latch successors should be the exit block!"); + // Support runtime unrolling for multiple exit blocks and multiple exiting + // blocks. + if (!LatchExit) { + assert(UseEpilogRemainder && "Multi exit unrolling is currently supported " + "unrolling with epilog remainder only!"); + LatchExit = LatchBR->getSuccessor(ExitIndex); + // We rely on LCSSA form being preserved when the exit blocks are + // transformed. + if (!PreserveLCSSA) + return false; + // TODO: Support multiple exiting blocks jumping to the `LatchExit`. This + // will need updating the logic in connectEpilog. + if (!LatchExit->getSinglePredecessor()) + return false; + SmallVector Exits; + L->getUniqueExitBlocks(Exits); + for (auto *BB : Exits) + if (BB != LatchExit) + OtherExits.push_back(BB); + } + + assert(LatchExit && "Latch Exit should exist!"); + // Use Scalar Evolution to compute the trip count. This allows more loops to // be unrolled than relying on induction var simplification. if (!SE) @@ -512,7 +541,6 @@ if (isa(TripCountSC)) return false; - BasicBlock *Header = L->getHeader(); BasicBlock *PreHeader = L->getLoopPreheader(); BranchInst *PreHeaderBR = cast(PreHeader->getTerminator()); const DataLayout &DL = Header->getModule()->getDataLayout(); @@ -654,8 +682,9 @@ // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; - CloneLoopBlocks(L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, - InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); + Loop *remainderLoop = CloneLoopBlocks( + L, ModVal, CreateRemainderLoop, UseEpilogRemainder, InsertTop, InsertBot, + NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Insert the cloned blocks into the function. F->getBasicBlockList().splice(InsertBot->getIterator(), @@ -663,6 +692,42 @@ NewBlocks[0]->getIterator(), F->end()); + // Now the loop blocks are cloned and the other exiting blocks from the + // remainder are connected to the original Loop's exit blocks. The remaining + // work is to update the phi nodes in the original loop, and take in the + // values from the cloned region. Also update the dominator info for + // OtherExits, since we have new edges into OtherExits. + for (auto *BB : OtherExits) { + for (auto &II : *BB) { + + // Given we preserve LCSSA form, we know that the values used outside the + // loop will be used through these phi nodes at the exit blocks that are + // transformed below. + if (!isa(II)) + break; + PHINode *Phi = cast(&II); + unsigned oldNumOperands = Phi->getNumIncomingValues(); + // Add the incoming values from the remainder code to the end of the phi + // node. + for (unsigned i =0; i < oldNumOperands; i++){ + Value *newVal = VMap[Phi->getIncomingValue(i)]; + if (!newVal) { + assert(isa(Phi->getIncomingValue(i)) && + "VMap should exist for all values except constants!"); + newVal = Phi->getIncomingValue(i); + } + Phi->addIncoming(newVal, + cast(VMap[Phi->getIncomingBlock(i)])); + } + } + // Update the dominator info because the immediate dominator is no longer the + // header of the original Loop. BB has edges both from L and remainder code. + // Since the preheader determines which loop is run (L or directly jump to + // the remainder code), we set the immediate dominator as the preheader. + if (DT) + DT->changeImmediateDominator(BB, PreHeader); + } + // Loop structure should be the following: // Epilog Prolog // @@ -725,6 +790,19 @@ if (Loop *ParentLoop = L->getParentLoop()) SE->forgetLoop(ParentLoop); + // Canonicalize to LoopSimplifyForm both original and remainder loops. We + // cannot rely on the LoopUnrollPass to do this because it only does + // canonicalization for parent/subloops and not the sibling loops. + if (OtherExits.size() > 0) { + // Generate dedicated exit blocks for the original loop, to preserve + // LoopSimplifyForm. + formDedicatedExitBlocks(L, DT, LI, PreserveLCSSA); + // Generate dedicated exit blocks for the remainder loop if one exists, to + // preserve LoopSimplifyForm. + if (remainderLoop) + formDedicatedExitBlocks(remainderLoop, DT, LI, PreserveLCSSA); + } + NumRuntimeUnrolled++; return true; } Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- /dev/null +++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -0,0 +1,467 @@ +; RUN: opt < %s -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine -S | FileCheck %s +; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=true -unroll-runtime-multi-exit=true -verify-dom-info -verify-loop-info -instcombine + +; the second RUN generates an epilog remainder block for all the test +; cases below (it does not generate a loop). + +; The check statements are autogenerated and manually reduced to important statements being tested. + +; test with three exiting and three exit blocks. +; none of the exit blocks have successors +define void @test1(i64 %trip, i1 %cond) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[TRIP:%.*]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TRIP]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; CHECK-NEXT: br i1 [[TMP1]], label %exit2.loopexit.unr-lcssa, label [[ENTRY_NEW:%.*]] +; CHECK: entry.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TRIP]], [[XTRAITER]] +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop_header: +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LOOP_LATCH_7:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[LOOP_LATCH:%.*]], label [[LOOP_EXITING_BB1:%.*]] +; CHECK: loop_exiting_bb1: +; CHECK-NEXT: br i1 false, label [[LOOP_EXITING_BB2:%.*]], label [[EXIT1_LOOPEXIT:%.*]] +; CHECK: loop_exiting_bb2: +; CHECK-NEXT: br i1 false, label [[LOOP_LATCH]], label [[EXIT3_LOOPEXIT:%.*]] +; CHECK: exit3.loopexit: +; CHECK-NEXT: br label [[EXIT3:%.*]] +; CHECK: exit3.loopexit2: +; CHECK-NEXT: br label [[EXIT3]] +; CHECK: exit3: +; CHECK-NEXT: ret void +; CHECK: exit2.loopexit.unr-lcssa.loopexit: +; CHECK-NEXT: br label %exit2.loopexit.unr-lcssa +; CHECK: exit2.loopexit.unr-lcssa: +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[EXIT2_LOOPEXIT:%.*]], label [[LOOP_HEADER_EPIL_PREHEADER:%.*]] +; CHECK: loop_header.epil.preheader: +; CHECK-NEXT: br label [[LOOP_HEADER_EPIL:%.*]] +; CHECK: loop_header.epil: +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[LOOP_LATCH_EPIL:%.*]] ] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP_LATCH_EPIL]], label [[LOOP_EXITING_BB1_EPIL:%.*]] +; CHECK: loop_latch.7: +; CHECK-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %exit2.loopexit.unr-lcssa.loopexit, label [[LOOP_HEADER]] +; +entry: + br label %loop_header + +loop_header: + %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] + br i1 %cond, label %loop_latch, label %loop_exiting_bb1 + +loop_exiting_bb1: + br i1 false, label %loop_exiting_bb2, label %exit1 + +loop_exiting_bb2: + br i1 false, label %loop_latch, label %exit3 + +exit3: + ret void + +loop_latch: + %iv_next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv_next, %trip + br i1 %cmp, label %loop_header, label %exit2.loopexit + +exit1: + ret void + +exit2.loopexit: + ret void +} + + +; test with three exiting and two exit blocks. +; The non-latch exit block has 2 unique predecessors. +; There are 2 values passed to the exit blocks that are calculated at every iteration. +; %sum.02 and %add. Both of these are incoming values for phi from every exiting +; unrolled block. +define i32 @test2(i32* nocapture %a, i64 %n) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 7 +; CHECK-NEXT: br i1 [[TMP1]], label %for.end.unr-lcssa, label [[ENTRY_NEW:%.*]] +; CHECK: entry.new: +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]] +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[FOR_BODY_7]] ] +; CHECK-NEXT: br i1 false, label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; CHECK: for.exiting_block: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY:%.*]] +; CHECK: for.end.unr-lcssa: +; CHECK-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], %for.end.unr-lcssa.loopexit ] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], %for.end.unr-lcssa.loopexit ] +; CHECK-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], %for.end.unr-lcssa.loopexit ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_END:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]] +; CHECK: header.epil.preheader: +; CHECK-NEXT: br label [[HEADER_EPIL:%.*]] +; CHECK: header.epil: +; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[FOR_BODY_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[FOR_BODY_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[FOR_BODY_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_EXITING_BLOCK_EPIL:%.*]] +; CHECK: for.exiting_block.epil: +; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2]], label [[FOR_BODY_EPIL]] +; CHECK: for.exiting_block.7: +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_BODY_7]] +; CHECK: for.body.7: +; CHECK: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; CHECK-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %for.end.unr-lcssa.loopexit, label [[HEADER]] +; +entry: + br label %header + +header: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + br i1 false, label %for.exit2, label %for.exiting_block + +for.exiting_block: + %cmp = icmp eq i64 %n, 42 + br i1 %cmp, label %for.exit2, label %for.body + +for.body: + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.end, label %header + +for.end: ; preds = %for.body + %sum.0.lcssa = phi i32 [ %add, %for.body ] + ret i32 %sum.0.lcssa + +for.exit2: + %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ] + ret i32 %retval +} + +; test with two exiting and three exit blocks. +; the non-latch exiting block has a switch. +define void @test3(i64 %trip, i64 %add) { +; CHECK-LABEL: @test3( +; CHECK: loop_header: +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[LOOP_LATCH_7:%.*]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LOOP_LATCH_7]] ] +; CHECK-NEXT: br i1 undef, label [[LOOP_LATCH:%.*]], label [[LOOP_EXITING_BB1:%.*]] +; CHECK: loop_exiting_bb1: +; CHECK-NEXT: switch i64 [[SUM]], label [[LOOP_LATCH]] [ +; CHECK-NEXT: i64 24, label [[EXIT1_LOOPEXIT:%.*]] +; CHECK-NEXT: i64 42, label [[EXIT3_LOOPEXIT:%.*]] +; CHECK-NEXT: ] +; CHECK: exit3.loopexit: +; CHECK-NEXT: br label [[EXIT3:%.*]] +; CHECK: exit3.loopexit2: +; CHECK-NEXT: br label [[EXIT3]] +; CHECK: exit3: +; CHECK-NEXT: ret void +; CHECK: loop_latch: +; CHECK-NEXT: [[SUM_NEXT:%.*]] = add i64 [[SUM]], [[ADD:%.*]] +; CHECK-NEXT: br i1 undef, label [[LOOP_LATCH_1:%.*]], label [[LOOP_EXITING_BB1_1:%.*]] +; CHECK: exit1.loopexit: +; CHECK-NEXT: br label [[EXIT1:%.*]] +; CHECK: exit1.loopexit1: +; CHECK-NEXT: br label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: ret void +; CHECK: exit2.loopexit.unr-lcssa.loopexit: +; CHECK-NEXT: br label %exit2.loopexit.unr-lcssa +; CHECK: exit2.loopexit.unr-lcssa: +; CHECK-NEXT: [[SUM_UNR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[SUM_NEXT_7]], %exit2.loopexit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[EXIT2_LOOPEXIT:%.*]], label [[LOOP_HEADER_EPIL_PREHEADER:%.*]] +; CHECK: loop_header.epil.preheader: +; CHECK-NEXT: br label [[LOOP_HEADER_EPIL:%.*]] +; CHECK: loop_header.epil: +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i64 [ [[SUM_UNR]], [[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[SUM_NEXT_EPIL:%.*]], [[LOOP_LATCH_EPIL:%.*]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[LOOP_LATCH_EPIL]] ] +; CHECK-NEXT: br i1 undef, label [[LOOP_LATCH_EPIL]], label [[LOOP_EXITING_BB1_EPIL:%.*]] +; CHECK: loop_exiting_bb1.epil: +; CHECK-NEXT: switch i64 [[SUM_EPIL]], label [[LOOP_LATCH_EPIL]] [ +; CHECK-NEXT: i64 24, label [[EXIT1_LOOPEXIT1:%.*]] +; CHECK-NEXT: i64 42, label [[EXIT3_LOOPEXIT2:%.*]] +; CHECK-NEXT: ] +; CHECK: loop_latch.epil: +; CHECK-NEXT: [[SUM_NEXT_EPIL]] = add i64 [[SUM_EPIL]], [[ADD]] +; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %exit2.loopexit.epilog-lcssa, label [[LOOP_HEADER_EPIL]], !llvm.loop !3 +; CHECK: exit2.loopexit.epilog-lcssa: +; CHECK-NEXT: br label [[EXIT2_LOOPEXIT]] +; CHECK: exit2.loopexit: +; CHECK-NEXT: ret void +; CHECK: loop_exiting_bb1.1: +; CHECK-NEXT: switch i64 [[SUM_NEXT]], label [[LOOP_LATCH_1]] [ +; CHECK-NEXT: i64 24, label [[EXIT1_LOOPEXIT]] +; CHECK-NEXT: i64 42, label [[EXIT3_LOOPEXIT]] +; CHECK-NEXT: ] +; CHECK: loop_latch.7: +; CHECK: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %exit2.loopexit.unr-lcssa.loopexit, label [[LOOP_HEADER]] +; +entry: + br label %loop_header + +loop_header: + %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] + %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ] + br i1 undef, label %loop_latch, label %loop_exiting_bb1 + +loop_exiting_bb1: + switch i64 %sum, label %loop_latch [ + i64 24, label %exit1 + i64 42, label %exit3 + ] + +exit3: + ret void + +loop_latch: + %iv_next = add nuw nsw i64 %iv, 1 + %sum.next = add i64 %sum, %add + %cmp = icmp ne i64 %iv_next, %trip + br i1 %cmp, label %loop_header, label %exit2.loopexit + +exit1: + ret void + +exit2.loopexit: + ret void +} + +; FIXME: Support multiple exiting blocks to the same latch exit block. +define i32 @test4(i32* nocapture %a, i64 %n, i1 %cond) { +; CHECK-LABEL: @test4( +; CHECK-NOT: .unr +; CHECK-NOT: .epil +entry: + br label %header + +header: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + br i1 %cond, label %for.end, label %for.exiting_block + +for.exiting_block: + %cmp = icmp eq i64 %n, 42 + br i1 %cmp, label %for.exit2, label %for.body + +for.body: ; preds = %for.body, %entry + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.end, label %header + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %header ], [ %add, %for.body ] + ret i32 %sum.0.lcssa + +for.exit2: + ret i32 42 +} + +; two exiting and two exit blocks. +; the non-latch exiting block has duplicate edges to the non-latch exit block. +define i64 @test5(i64 %trip, i64 %add, i1 %cond) { +; CHECK-LABEL: @test5( +; CHECK: loop_header: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[IV_NEXT_7:%.*]], [[LOOP_LATCH_7:%.*]] ] +; CHECK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[SUM_NEXT_7:%.*]], [[LOOP_LATCH_7]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LOOP_LATCH_7]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[LOOP_LATCH:%.*]], label [[LOOP_EXITING:%.*]] +; CHECK: loop_exiting: +; CHECK-NEXT: [[IVY:%.*]] = add i64 [[IV]], [[ADD:%.*]] +; CHECK-NEXT: switch i64 [[SUM]], label [[LOOP_LATCH]] [ +; CHECK-NEXT: i64 24, label [[EXIT1_LOOPEXIT:%.*]] +; CHECK-NEXT: i64 42, label [[EXIT1_LOOPEXIT]] +; CHECK-NEXT: ] +; CHECK: loop_latch: +; CHECK-NEXT: [[SUM_NEXT:%.*]] = add i64 [[SUM]], [[ADD]] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP_LATCH_1:%.*]], label [[LOOP_EXITING_1:%.*]] +; CHECK: exit1.loopexit: +; CHECK-NEXT: [[RESULT_PH:%.*]] = phi i64 [ [[IVY]], [[LOOP_EXITING]] ], [ [[IVY]], [[LOOP_EXITING]] ], [ [[IVY_1:%.*]], [[LOOP_EXITING_1]] ], [ [[IVY_1]], [[LOOP_EXITING_1]] ], [ [[IVY_2:%.*]], [[LOOP_EXITING_2:%.*]] ], [ [[IVY_2]], [[LOOP_EXITING_2]] ], +; CHECK-NEXT: br label [[EXIT1:%.*]] +; CHECK: exit1.loopexit2: +; CHECK-NEXT: [[IVY_EPIL:%.*]] = add i64 [[IV_EPIL:%.*]], [[ADD]] +; CHECK-NEXT: br label [[EXIT1]] +; CHECK: exit1: +; CHECK-NEXT: [[RESULT:%.*]] = phi i64 [ [[RESULT_PH]], [[EXIT1_LOOPEXIT]] ], [ [[IVY_EPIL]], [[EXIT1_LOOPEXIT2:%.*]] ] +; CHECK-NEXT: ret i64 [[RESULT]] +; CHECK: latchexit.unr-lcssa.loopexit: +; CHECK-NEXT: br label %latchexit.unr-lcssa +; CHECK: latchexit.unr-lcssa: +; CHECK-NEXT: [[SUM_NEXT_LCSSA_PH:%.*]] = phi i64 [ undef, [[ENTRY:%.*]] ], [ [[SUM_NEXT_7]], %latchexit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_NEXT_7]], %latchexit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[SUM_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT_7]], %latchexit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCHEXIT:%.*]], label [[LOOP_HEADER_EPIL_PREHEADER:%.*]] +; CHECK: loop_header.epil.preheader: +; CHECK-NEXT: br label [[LOOP_HEADER_EPIL:%.*]] +; CHECK: loop_header.epil: +; CHECK-NEXT: [[IV_EPIL]] = phi i64 [ [[IV_UNR]], [[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[IV_NEXT_EPIL:%.*]], [[LOOP_LATCH_EPIL:%.*]] ] +; CHECK-NEXT: [[SUM_EPIL:%.*]] = phi i64 [ [[SUM_UNR]], [[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[SUM_NEXT_EPIL:%.*]], [[LOOP_LATCH_EPIL]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[LOOP_HEADER_EPIL_PREHEADER]] ], [ [[EPIL_ITER_SUB:%.*]], [[LOOP_LATCH_EPIL]] ] +; CHECK-NEXT: br i1 [[COND]], label [[LOOP_LATCH_EPIL]], label [[LOOP_EXITING_EPIL:%.*]] +; CHECK: loop_exiting.epil: +; CHECK-NEXT: switch i64 [[SUM_EPIL]], label [[LOOP_LATCH_EPIL]] [ +; CHECK-NEXT: i64 24, label [[EXIT1_LOOPEXIT2]] +; CHECK-NEXT: i64 42, label [[EXIT1_LOOPEXIT2]] +; CHECK-NEXT: ] +; CHECK: loop_latch.epil: +; CHECK-NEXT: [[IV_NEXT_EPIL]] = add nuw nsw i64 [[IV_EPIL]], 1 +; CHECK-NEXT: [[SUM_NEXT_EPIL]] = add i64 [[SUM_EPIL]], [[ADD]] +; CHECK-NEXT: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %latchexit.epilog-lcssa, label [[LOOP_HEADER_EPIL]], !llvm.loop !4 +; CHECK: latchexit.epilog-lcssa: +; CHECK-NEXT: br label [[LATCHEXIT]] +; CHECK: latchexit: +; CHECK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT_LCSSA_PH]], %latchexit.unr-lcssa ], [ [[SUM_NEXT_EPIL]], %latchexit.epilog-lcssa ] +; CHECK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] +; +entry: + br label %loop_header + +loop_header: + %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ] + %sum = phi i64 [ 0, %entry ], [ %sum.next, %loop_latch ] + br i1 %cond, label %loop_latch, label %loop_exiting + +loop_exiting: + %ivy = add i64 %iv, %add + switch i64 %sum, label %loop_latch [ + i64 24, label %exit1 + i64 42, label %exit1 + ] + +loop_latch: + %iv_next = add nuw nsw i64 %iv, 1 + %sum.next = add i64 %sum, %add + %cmp = icmp ne i64 %iv_next, %trip + br i1 %cmp, label %loop_header, label %latchexit + +exit1: + %result = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ] + ret i64 %result + +latchexit: + ret i64 %sum.next +} + +; test when exit blocks have successors. +define i32 @test6(i32* nocapture %a, i64 %n, i1 %cond, i32 %x) { +; CHECK-LABEL: @test6( +; CHECK: header: +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] +; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ 0, [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] +; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ [[UNROLL_ITER]], [[ENTRY_NEW]] ], [ [[NITER_NSUB_7:%.*]], [[LATCH_7]] ] +; CHECK-NEXT: br i1 false, label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; CHECK: for.exiting_block: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH:%.*]] +; CHECK: latch: +; CHECK: br i1 false, label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_1:%.*]] +; CHECK: latch_exit.unr-lcssa.loopexit: +; CHECK-NEXT: br label %latch_exit.unr-lcssa +; CHECK: latch_exit.unr-lcssa: +; CHECK-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[ADD_7]], %latch_exit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT_7]], %latch_exit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_7]], %latch_exit.unr-lcssa.loopexit ] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[LATCH_EXIT:%.*]], label [[HEADER_EPIL_PREHEADER:%.*]] +; CHECK: header.epil: +; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_EPIL:%.*]], [[LATCH_EPIL:%.*]] ], [ [[INDVARS_IV_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[SUM_02_EPIL:%.*]] = phi i32 [ [[ADD_EPIL:%.*]], [[LATCH_EPIL]] ], [ [[SUM_02_UNR]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: [[EPIL_ITER:%.*]] = phi i64 [ [[EPIL_ITER_SUB:%.*]], [[LATCH_EPIL]] ], [ [[XTRAITER]], [[HEADER_EPIL_PREHEADER]] ] +; CHECK-NEXT: br i1 false, label [[FOR_EXIT2_LOOPEXIT2:%.*]], label [[FOR_EXITING_BLOCK_EPIL:%.*]] +; CHECK: for.exiting_block.epil: +; CHECK-NEXT: [[CMP_EPIL:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[FOR_EXIT2_LOOPEXIT2]], label [[LATCH_EPIL]] +; CHECK: latch.epil: +; CHECK: [[EPIL_ITER_SUB]] = add i64 [[EPIL_ITER]], -1 +; CHECK-NEXT: [[EPIL_ITER_CMP:%.*]] = icmp eq i64 [[EPIL_ITER_SUB]], 0 +; CHECK-NEXT: br i1 [[EPIL_ITER_CMP]], label %latch_exit.epilog-lcssa, label [[HEADER_EPIL]], !llvm.loop !5 +; CHECK: latch_exit.epilog-lcssa: +; CHECK-NEXT: br label [[LATCH_EXIT]] +; CHECK: latch_exit: +; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_PH]], %latch_exit.unr-lcssa ], [ [[ADD_EPIL]], %latch_exit.epilog-lcssa ] +; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] +; CHECK: for.exit2.loopexit: +; CHECK-NEXT: [[RETVAL_PH:%.*]] = phi i32 [ 42, [[FOR_EXITING_BLOCK]] ], [ [[SUM_02]], [[HEADER]] ], [ [[ADD]], [[LATCH]] ], [ 42, [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1:%.*]], [[LATCH_1:%.*]] ], [ 42, [[FOR_EXITING_BLOCK_2:%.*]] ], [ [[ADD_2:%.*]], [[LATCH_2:%.*]] ], +; CHECK-NEXT: br label [[FOR_EXIT2:%.*]] +; CHECK: for.exit2.loopexit2: +; CHECK-NEXT: [[RETVAL_PH3:%.*]] = phi i32 [ 42, [[FOR_EXITING_BLOCK_EPIL]] ], [ [[SUM_02_EPIL]], [[HEADER_EPIL]] ] +; CHECK-NEXT: br label [[FOR_EXIT2]] +; CHECK: for.exit2: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i32 [ [[RETVAL_PH]], [[FOR_EXIT2_LOOPEXIT]] ], [ [[RETVAL_PH3]], [[FOR_EXIT2_LOOPEXIT2]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[EXIT_TRUE:%.*]], label [[EXIT_FALSE:%.*]] +; CHECK: exit_true: +; CHECK-NEXT: ret i32 [[RETVAL]] +; CHECK: exit_false: +; CHECK-NEXT: [[ADDX:%.*]] = add i32 [[RETVAL]], [[X:%.*]] +; CHECK-NEXT: ret i32 [[ADDX]] +; CHECK: for.exiting_block.7: +; CHECK-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; CHECK-NEXT: br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_7]] +; CHECK: latch.7: +; CHECK: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; CHECK-NEXT: [[NITER_NSUB_7]] = add i64 [[NITER]], -8 +; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NSUB_7]], 0 +; CHECK-NEXT: br i1 [[NITER_NCMP_7]], label %latch_exit.unr-lcssa.loopexit, label [[HEADER]] +; +entry: + br label %header + +header: + %indvars.iv = phi i64 [ %indvars.iv.next, %latch ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %latch ], [ 0, %entry ] + br i1 false, label %for.exit2, label %for.exiting_block + +for.exiting_block: + %cmp = icmp eq i64 %n, 42 + br i1 %cmp, label %for.exit2, label %latch + +latch: + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %load = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %load, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %latch_exit, label %header + +latch_exit: + %sum.0.lcssa = phi i32 [ %add, %latch ] + ret i32 %sum.0.lcssa + +for.exit2: + %retval = phi i32 [ %sum.02, %header ], [ 42, %for.exiting_block ] + %addx = add i32 %retval, %x + br i1 %cond, label %exit_true, label %exit_false + +exit_true: + ret i32 %retval + +exit_false: + ret i32 %addx +} + +