Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -85,6 +85,14 @@ assert(Latch && "Loop must have a latch"); BasicBlock *PrologLatch = cast(VMap[Latch]); + BasicBlock *Header = L->getHeader(); + auto *PrologHeader = cast(VMap[Header]); + + bool isHeaderPredOfLatchExit = false; + for (auto *Pred : predecessors(OriginalLoopLatchExit)) + if (Pred == Header && Header != Latch) + isHeaderPredOfLatchExit = true; + SmallVector PrologExitPreds(predecessors(PrologExit)); // Create a PHI node for each outgoing value from the original loop // (which means it is an outgoing value from the prolog code too). // The new PHI node is inserted in the prolog end basic block. @@ -94,33 +102,75 @@ for (PHINode &PN : Succ->phis()) { // Add a new PHI node to the prolog end block and add the // appropriate incoming values. - // TODO: This code assumes that the PrologExit (or the LatchExit block for - // prolog loop) contains only one predecessor from the loop, i.e. the - // PrologLatch. When supporting multiple-exiting block loops, we can have - // two or more blocks that have the LatchExit as the target in the - // original loop. - PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr", + PHINode *NewPN = PHINode::Create(PN.getType(), PrologExitPreds.size(), PN.getName() + ".unr", PrologExit->getFirstNonPHI()); - // Adding a value to the new PHI node from the original loop preheader. - // This is the value that skips all the prolog code. + // The PrologExit predecessors definitely include the PreHeader and PrologLatch. + // With multi-exit unrolling, we can have other predecessors to the + // PrologExit, such as the PrologHeader or other blocks in the + // PrologLoop or remainder code. We need to add the incoming values from + // all relevant predecessors. + + // Succ is loop header. if (L->contains(&PN)) { - // Succ is loop header. + // Adding a value to the new PHI node from the original loop preheader. + // This is the value that skips all the prolog code. NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader); + Value *LatchV = PN.getIncomingValueForBlock(Latch); + BasicBlock *LatchVBB = nullptr; + if (Instruction *I = dyn_cast(LatchV)) { + if (L->contains(I)) { + LatchV = VMap.lookup(I); + if (isa(LatchV)) + LatchVBB = cast(LatchV)->getParent(); + } + } + // Adding a value to the new PHI node from the last prolog block + // that was created. + NewPN->addIncoming(LatchV, PrologLatch); + Value *HeaderV = VMap[&PN]; + // When header is pred of LatchExit (i.e. the PrologHeader is also a + // pred of PrologExit), we need to add the corresponding cloned value of + // PN as the incoming value for NewPN. + if (isHeaderPredOfLatchExit) + NewPN->addIncoming(HeaderV, PrologHeader); + // Update NewPN with values for the remaining loop exiting blocks that go to the PrologExit. + for (auto *Pred : PrologExitPreds) { + if (Pred == PrologHeader || Pred == PrologLatch || Pred == PreHeader) + continue; + Value *PredIncomingValue = nullptr; + assert(DT && "We should have a DT at this point!"); + // The incoming value should either be LatchV or + // HeaderV. If LatchVBB dominates the Pred, we should use that value + // as the incoming value. + if (LatchVBB && PrologHeader != LatchVBB && + DT->dominates(LatchVBB, Pred)) + PredIncomingValue = LatchV; + else + PredIncomingValue = HeaderV; + assert(PredIncomingValue && "PredIncomingValue is null!"); + NewPN->addIncoming(PredIncomingValue, Pred); + } } else { // Succ is LatchExit. NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader); - } - - Value *V = PN.getIncomingValueForBlock(Latch); - if (Instruction *I = dyn_cast(V)) { - if (L->contains(I)) { - V = VMap.lookup(I); + // Update NewPN incoming values based on PN's incoming values. + // This is needed when the LatchExit has predecessors other than the + // Latch. + for (unsigned i = 0; i < PN.getNumIncomingValues(); i++) { + Value *V = PN.getIncomingValue(i); + BasicBlock *BBIncoming = PN.getIncomingBlock(i); + if (LI->getLoopFor(BBIncoming) != L) + continue; + if (auto *I = dyn_cast(V)) { + if (L->contains(I)) + V = VMap.lookup(I); + } + NewPN->addIncoming(V, cast(VMap[BBIncoming])); } } - // Adding a value to the new PHI node from the last prolog block - // that was created. - NewPN->addIncoming(V, PrologLatch); + assert(NewPN->getNumIncomingValues() == PrologExitPreds.size() && + "expected from phi node definition!"); // Update the existing PHI node operand with the value from the // new PHI node. How this is done depends on if the existing @@ -134,9 +184,9 @@ } // Make sure that created prolog loop is in simplified form - SmallVector PrologExitPreds; Loop *PrologLoop = LI->getLoopFor(PrologLatch); if (PrologLoop) { + SmallVector PrologExitPreds; for (BasicBlock *PredBB : predecessors(PrologExit)) if (PrologLoop->contains(PredBB)) PrologExitPreds.push_back(PredBB); @@ -368,6 +418,13 @@ NewIdx->addIncoming(NewIter, InsertTop); NewIdx->addIncoming(IdxSub, NewBB); } + { + // Populate the VMap for the OrigLatchExit. + BranchInst *OrigLatchBR = cast(Latch->getTerminator()); + unsigned ExitIndex = OrigLatchBR->getSuccessor(0) == Header ? 1 : 0; + BasicBlock *OrigLatchExit = OrigLatchBR->getSuccessor(ExitIndex); + VMap[OrigLatchExit] = InsertBot; + } LatchBR->eraseFromParent(); } } @@ -429,7 +486,7 @@ static bool canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl &OtherExits, BasicBlock *LatchExit, bool PreserveLCSSA, - bool UseEpilogRemainder) { + bool UseEpilogRemainder, DominatorTree *DT) { // We currently have some correctness constrains in unrolling a multi-exit // loop. Check for these below. @@ -445,8 +502,10 @@ // TODO: Support multiple exiting blocks jumping to the `LatchExit` when // UnrollRuntimeMultiExit is true. This will need updating the logic in - // connectEpilog/connectProlog. - if (!LatchExit->getSinglePredecessor()) { + // connectEpilog. + // We also need DT support when we have multiple exiting blocks to the + // LatchExit. + if (!LatchExit->getSinglePredecessor() && (UseEpilogRemainder || !DT)) { LLVM_DEBUG( dbgs() << "Bailout for multi-exit handling when latch exit has >1 " "predecessor.\n"); @@ -468,12 +527,12 @@ /// we return true only if UnrollRuntimeMultiExit is set to true. static bool canProfitablyUnrollMultiExitLoop( Loop *L, SmallVectorImpl &OtherExits, BasicBlock *LatchExit, - bool PreserveLCSSA, bool UseEpilogRemainder) { + bool PreserveLCSSA, bool UseEpilogRemainder, DominatorTree *DT) { #if !defined(NDEBUG) SmallVector OtherExitsDummyCheck; assert(canSafelyUnrollMultiExitLoop(L, OtherExitsDummyCheck, LatchExit, - PreserveLCSSA, UseEpilogRemainder) && + PreserveLCSSA, UseEpilogRemainder, DT) && "Should be safe to unroll before checking profitability!"); #endif @@ -599,9 +658,9 @@ SmallVector OtherExits; bool isMultiExitUnrollingEnabled = canSafelyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, - UseEpilogRemainder) && + UseEpilogRemainder, DT) && canProfitablyUnrollMultiExitLoop(L, OtherExits, LatchExit, PreserveLCSSA, - UseEpilogRemainder); + UseEpilogRemainder, DT); // Support only single exit and exiting block unless multi-exit loop unrolling is enabled. if (!isMultiExitUnrollingEnabled && (!L->getExitingBlock() || OtherExits.size())) { Index: test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -213,9 +213,133 @@ ; EPILOG-NOT: .unr ; EPILOG-NOT: .epil -; PROLOG: hdr_latch_same_exit( -; PROLOG-NOT: .unr -; PROLOG-NOT: .prol +; PROLOG-LABEL: @hdr_latch_same_exit( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; PROLOG-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_LOOPEXIT:%.*]], label [[HEADER_PROL_PREHEADER:%.*]] +; PROLOG: header.prol.preheader: +; PROLOG-NEXT: br label [[HEADER_PROL:%.*]] +; PROLOG: header.prol: +; PROLOG-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[SUM_02_PROL:%.*]] = phi i32 [ [[ADD_PROL:%.*]], [[LATCH_PROL]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ], [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: br i1 [[COND:%.*]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_EXITING_BLOCK_PROL:%.*]] +; PROLOG: for.exiting_block.prol: +; PROLOG-NEXT: [[CMP_PROL:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_PROL]], label [[FOR_EXIT2_LOOPEXIT1:%.*]], label [[LATCH_PROL]] +; PROLOG: latch.prol: +; PROLOG-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_PROL]] +; PROLOG-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_PROL]], align 4 +; PROLOG-NEXT: [[ADD_PROL]] = add nsw i32 [[TMP1]], [[SUM_02_PROL]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_PROL]] = add i64 [[INDVARS_IV_PROL]], 1 +; PROLOG-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 +; PROLOG-NEXT: [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 +; PROLOG-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]], label [[HEADER_PROL]] +; PROLOG: header.prol.loopexit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ 0, [[HEADER_PROL]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ], [ [[INDVARS_IV_PROL]], [[HEADER_PROL]] ] +; PROLOG-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ [[SUM_02_PROL]], [[HEADER_PROL]] ] +; PROLOG-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; PROLOG: header.prol.loopexit: +; PROLOG-NEXT: [[RESULT_UNR:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[RESULT_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP0]], 7 +; PROLOG-NEXT: br i1 [[TMP2]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] +; PROLOG: entry.new: +; PROLOG-NEXT: br label [[HEADER:%.*]] +; PROLOG: header: +; PROLOG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] +; PROLOG-NEXT: [[SUM_02:%.*]] = phi i32 [ [[SUM_02_UNR]], [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; PROLOG: for.exiting_block: +; PROLOG-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[LATCH:%.*]] +; PROLOG: latch: +; PROLOG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; PROLOG-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; PROLOG-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i64 [[INDVARS_IV]], 1 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_1:%.*]] +; PROLOG: latchExit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_PH:%.*]] = phi i32 [ 0, [[HEADER]] ], [ 0, [[LATCH]] ], [ 0, [[LATCH_1:%.*]] ], [ 0, [[LATCH_2:%.*]] ], [ 0, [[LATCH_3:%.*]] ], [ 0, [[LATCH_4:%.*]] ], [ 0, [[LATCH_5:%.*]] ], [ 0, [[LATCH_6:%.*]] ], [ [[ADD_7]], [[LATCH_7]] ] +; PROLOG-NEXT: br label [[LATCHEXIT]] +; PROLOG: latchExit: +; PROLOG-NEXT: [[RESULT:%.*]] = phi i32 [ [[RESULT_UNR]], [[HEADER_PROL_LOOPEXIT]] ], [ [[RESULT_PH]], [[LATCHEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: ret i32 [[RESULT]] +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: br label [[FOR_EXIT2:%.*]] +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: br label [[FOR_EXIT2]] +; PROLOG: for.exit2: +; PROLOG-NEXT: ret i32 42 +; PROLOG: for.exiting_block.1: +; PROLOG-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_1]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_1]] +; PROLOG: latch.1: +; PROLOG-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; PROLOG-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; PROLOG-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add i64 [[INDVARS_IV]], 2 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_2:%.*]] +; PROLOG: for.exiting_block.2: +; PROLOG-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_2]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_2]] +; PROLOG: latch.2: +; PROLOG-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; PROLOG-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; PROLOG-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add i64 [[INDVARS_IV]], 3 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_3:%.*]] +; PROLOG: for.exiting_block.3: +; PROLOG-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_3]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_3]] +; PROLOG: latch.3: +; PROLOG-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; PROLOG-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; PROLOG-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add i64 [[INDVARS_IV]], 4 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_4:%.*]] +; PROLOG: for.exiting_block.4: +; PROLOG-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_4]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_4]] +; PROLOG: latch.4: +; PROLOG-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; PROLOG-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; PROLOG-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add i64 [[INDVARS_IV]], 5 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_5:%.*]] +; PROLOG: for.exiting_block.5: +; PROLOG-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_5]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_5]] +; PROLOG: latch.5: +; PROLOG-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; PROLOG-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; PROLOG-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add i64 [[INDVARS_IV]], 6 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_6:%.*]] +; PROLOG: for.exiting_block.6: +; PROLOG-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_6]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_6]] +; PROLOG: latch.6: +; PROLOG-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; PROLOG-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; PROLOG-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add i64 [[INDVARS_IV]], 7 +; PROLOG-NEXT: br i1 [[COND]], label [[LATCHEXIT_UNR_LCSSA]], label [[FOR_EXITING_BLOCK_7:%.*]] +; PROLOG: for.exiting_block.7: +; PROLOG-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_7]], label [[FOR_EXIT2_LOOPEXIT]], label [[LATCH_7]] +; PROLOG: latch.7: +; PROLOG-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; PROLOG-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; PROLOG-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; PROLOG-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[N]] +; PROLOG-NEXT: br i1 [[EXITCOND_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[HEADER]] +; entry: br label %header @@ -252,9 +376,133 @@ ; EPILOG-NOT: .unr ; EPILOG-NOT: .epil -; PROLOG: otherblock_latch_same_exit( -; PROLOG-NOT: .unr -; PROLOG-NOT: .prol +; PROLOG-LABEL: @otherblock_latch_same_exit( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; PROLOG-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_LOOPEXIT:%.*]], label [[HEADER_PROL_PREHEADER:%.*]] +; PROLOG: header.prol.preheader: +; PROLOG-NEXT: br label [[HEADER_PROL:%.*]] +; PROLOG: header.prol: +; PROLOG-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[SUM_02_PROL:%.*]] = phi i32 [ [[ADD_PROL:%.*]], [[LATCH_PROL]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ], [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: br i1 [[COND:%.*]], label [[FOR_EXIT2_LOOPEXIT1:%.*]], label [[FOR_EXITING_BLOCK_PROL:%.*]] +; PROLOG: for.exiting_block.prol: +; PROLOG-NEXT: [[CMP_PROL:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], label [[LATCH_PROL]] +; PROLOG: latch.prol: +; PROLOG-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_PROL]] +; PROLOG-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_PROL]], align 4 +; PROLOG-NEXT: [[ADD_PROL]] = add nsw i32 [[TMP1]], [[SUM_02_PROL]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_PROL]] = add i64 [[INDVARS_IV_PROL]], 1 +; PROLOG-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 +; PROLOG-NEXT: [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 +; PROLOG-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]], label [[HEADER_PROL]] +; PROLOG: header.prol.loopexit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ 2, [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ], [ [[INDVARS_IV_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ [[SUM_02_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; PROLOG: header.prol.loopexit: +; PROLOG-NEXT: [[RESULT_UNR:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[RESULT_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP0]], 7 +; PROLOG-NEXT: br i1 [[TMP2]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] +; PROLOG: entry.new: +; PROLOG-NEXT: br label [[HEADER:%.*]] +; PROLOG: header: +; PROLOG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] +; PROLOG-NEXT: [[SUM_02:%.*]] = phi i32 [ [[SUM_02_UNR]], [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; PROLOG: for.exiting_block: +; PROLOG-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[LATCH:%.*]] +; PROLOG: latch: +; PROLOG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; PROLOG-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; PROLOG-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i64 [[INDVARS_IV]], 1 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_1:%.*]] +; PROLOG: latchExit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_PH:%.*]] = phi i32 [ 2, [[FOR_EXITING_BLOCK]] ], [ 2, [[FOR_EXITING_BLOCK_1]] ], [ 2, [[FOR_EXITING_BLOCK_2:%.*]] ], [ 2, [[FOR_EXITING_BLOCK_3:%.*]] ], [ 2, [[FOR_EXITING_BLOCK_4:%.*]] ], [ 2, [[FOR_EXITING_BLOCK_5:%.*]] ], [ 2, [[FOR_EXITING_BLOCK_6:%.*]] ], [ 2, [[FOR_EXITING_BLOCK_7:%.*]] ], [ [[ADD_7]], [[LATCH_7]] ] +; PROLOG-NEXT: br label [[LATCHEXIT]] +; PROLOG: latchExit: +; PROLOG-NEXT: [[RESULT:%.*]] = phi i32 [ [[RESULT_UNR]], [[HEADER_PROL_LOOPEXIT]] ], [ [[RESULT_PH]], [[LATCHEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: ret i32 [[RESULT]] +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: br label [[FOR_EXIT2:%.*]] +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: br label [[FOR_EXIT2]] +; PROLOG: for.exit2: +; PROLOG-NEXT: ret i32 42 +; PROLOG: for.exiting_block.1: +; PROLOG-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_1]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_1:%.*]] +; PROLOG: latch.1: +; PROLOG-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; PROLOG-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; PROLOG-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add i64 [[INDVARS_IV]], 2 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_2]] +; PROLOG: for.exiting_block.2: +; PROLOG-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_2]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_2:%.*]] +; PROLOG: latch.2: +; PROLOG-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; PROLOG-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; PROLOG-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add i64 [[INDVARS_IV]], 3 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_3]] +; PROLOG: for.exiting_block.3: +; PROLOG-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_3]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_3:%.*]] +; PROLOG: latch.3: +; PROLOG-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; PROLOG-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; PROLOG-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add i64 [[INDVARS_IV]], 4 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_4]] +; PROLOG: for.exiting_block.4: +; PROLOG-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_4]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_4:%.*]] +; PROLOG: latch.4: +; PROLOG-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; PROLOG-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; PROLOG-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add i64 [[INDVARS_IV]], 5 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_5]] +; PROLOG: for.exiting_block.5: +; PROLOG-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_5]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_5:%.*]] +; PROLOG: latch.5: +; PROLOG-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; PROLOG-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; PROLOG-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add i64 [[INDVARS_IV]], 6 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_6]] +; PROLOG: for.exiting_block.6: +; PROLOG-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_6]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_6:%.*]] +; PROLOG: latch.6: +; PROLOG-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; PROLOG-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; PROLOG-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add i64 [[INDVARS_IV]], 7 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_7]] +; PROLOG: for.exiting_block.7: +; PROLOG-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_7]] +; PROLOG: latch.7: +; PROLOG-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; PROLOG-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; PROLOG-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; PROLOG-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[N]] +; PROLOG-NEXT: br i1 [[EXITCOND_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[HEADER]] +; entry: br label %header @@ -286,15 +534,139 @@ ; Two exiting blocks to latch where the exiting blocks are Latch and a ; non-header ; Same as above test except the incoming value for latch Phi is from the header -; FIXME: We should be able to runtime unroll. +; FIXME: We should be able to runtime unroll for epilog. define i32 @otherblock_latch_same_exit2(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: otherblock_latch_same_exit2( ; EPILOG-NOT: .unr ; EPILOG-NOT: .epil -; PROLOG: otherblock_latch_same_exit2( -; PROLOG-NOT: .unr -; PROLOG-NOT: .prol +; PROLOG-LABEL: @otherblock_latch_same_exit2( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; PROLOG-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_LOOPEXIT:%.*]], label [[HEADER_PROL_PREHEADER:%.*]] +; PROLOG: header.prol.preheader: +; PROLOG-NEXT: br label [[HEADER_PROL:%.*]] +; PROLOG: header.prol: +; PROLOG-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[SUM_02_PROL:%.*]] = phi i32 [ [[ADD_PROL:%.*]], [[LATCH_PROL]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ], [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: br i1 [[COND:%.*]], label [[FOR_EXIT2_LOOPEXIT1:%.*]], label [[FOR_EXITING_BLOCK_PROL:%.*]] +; PROLOG: for.exiting_block.prol: +; PROLOG-NEXT: [[CMP_PROL:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], label [[LATCH_PROL]] +; PROLOG: latch.prol: +; PROLOG-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_PROL]] +; PROLOG-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_PROL]], align 4 +; PROLOG-NEXT: [[ADD_PROL]] = add nsw i32 [[TMP1]], [[SUM_02_PROL]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_PROL]] = add i64 [[INDVARS_IV_PROL]], 1 +; PROLOG-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 +; PROLOG-NEXT: [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 +; PROLOG-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]], label [[HEADER_PROL]] +; PROLOG: header.prol.loopexit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ [[SUM_02_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ], [ [[INDVARS_IV_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ [[SUM_02_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; PROLOG: header.prol.loopexit: +; PROLOG-NEXT: [[RESULT_UNR:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[RESULT_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP0]], 7 +; PROLOG-NEXT: br i1 [[TMP2]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] +; PROLOG: entry.new: +; PROLOG-NEXT: br label [[HEADER:%.*]] +; PROLOG: header: +; PROLOG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] +; PROLOG-NEXT: [[SUM_02:%.*]] = phi i32 [ [[SUM_02_UNR]], [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; PROLOG: for.exiting_block: +; PROLOG-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[LATCH:%.*]] +; PROLOG: latch: +; PROLOG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; PROLOG-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; PROLOG-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i64 [[INDVARS_IV]], 1 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_1:%.*]] +; PROLOG: latchExit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1:%.*]], [[FOR_EXITING_BLOCK_2:%.*]] ], [ [[ADD_2:%.*]], [[FOR_EXITING_BLOCK_3:%.*]] ], [ [[ADD_3:%.*]], [[FOR_EXITING_BLOCK_4:%.*]] ], [ [[ADD_4:%.*]], [[FOR_EXITING_BLOCK_5:%.*]] ], [ [[ADD_5:%.*]], [[FOR_EXITING_BLOCK_6:%.*]] ], [ [[ADD_6:%.*]], [[FOR_EXITING_BLOCK_7:%.*]] ], [ [[ADD_7]], [[LATCH_7]] ] +; PROLOG-NEXT: br label [[LATCHEXIT]] +; PROLOG: latchExit: +; PROLOG-NEXT: [[RESULT:%.*]] = phi i32 [ [[RESULT_UNR]], [[HEADER_PROL_LOOPEXIT]] ], [ [[RESULT_PH]], [[LATCHEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: ret i32 [[RESULT]] +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: br label [[FOR_EXIT2:%.*]] +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: br label [[FOR_EXIT2]] +; PROLOG: for.exit2: +; PROLOG-NEXT: ret i32 42 +; PROLOG: for.exiting_block.1: +; PROLOG-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_1]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_1:%.*]] +; PROLOG: latch.1: +; PROLOG-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; PROLOG-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; PROLOG-NEXT: [[ADD_1]] = add nsw i32 [[TMP4]], [[ADD]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add i64 [[INDVARS_IV]], 2 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_2]] +; PROLOG: for.exiting_block.2: +; PROLOG-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_2]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_2:%.*]] +; PROLOG: latch.2: +; PROLOG-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; PROLOG-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; PROLOG-NEXT: [[ADD_2]] = add nsw i32 [[TMP5]], [[ADD_1]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add i64 [[INDVARS_IV]], 3 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_3]] +; PROLOG: for.exiting_block.3: +; PROLOG-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_3]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_3:%.*]] +; PROLOG: latch.3: +; PROLOG-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; PROLOG-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; PROLOG-NEXT: [[ADD_3]] = add nsw i32 [[TMP6]], [[ADD_2]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add i64 [[INDVARS_IV]], 4 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_4]] +; PROLOG: for.exiting_block.4: +; PROLOG-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_4]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_4:%.*]] +; PROLOG: latch.4: +; PROLOG-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; PROLOG-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; PROLOG-NEXT: [[ADD_4]] = add nsw i32 [[TMP7]], [[ADD_3]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add i64 [[INDVARS_IV]], 5 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_5]] +; PROLOG: for.exiting_block.5: +; PROLOG-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_5]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_5:%.*]] +; PROLOG: latch.5: +; PROLOG-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; PROLOG-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; PROLOG-NEXT: [[ADD_5]] = add nsw i32 [[TMP8]], [[ADD_4]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add i64 [[INDVARS_IV]], 6 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_6]] +; PROLOG: for.exiting_block.6: +; PROLOG-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_6]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_6:%.*]] +; PROLOG: latch.6: +; PROLOG-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; PROLOG-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; PROLOG-NEXT: [[ADD_6]] = add nsw i32 [[TMP9]], [[ADD_5]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add i64 [[INDVARS_IV]], 7 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_7]] +; PROLOG: for.exiting_block.7: +; PROLOG-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_7]] +; PROLOG: latch.7: +; PROLOG-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; PROLOG-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; PROLOG-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; PROLOG-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[N]] +; PROLOG-NEXT: br i1 [[EXITCOND_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[HEADER]] +; entry: br label %header @@ -327,15 +699,138 @@ ; non-header ; Same as above test except the incoming value for cloned latch Phi is from the ; for.exiting_block. -; FIXME: We should be able to runtime unroll. +; FIXME: We should be able to runtime unroll for epilog. define i32 @otherblock_latch_same_exit3(i32* nocapture %a, i64 %n, i1 %cond) { ; EPILOG: otherblock_latch_same_exit3( ; EPILOG-NOT: .unr ; EPILOG-NOT: .epil -; PROLOG: otherblock_latch_same_exit3( -; PROLOG-NOT: .unr -; PROLOG-NOT: .prol +; PROLOG-LABEL: @otherblock_latch_same_exit3( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; PROLOG-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_LOOPEXIT:%.*]], label [[HEADER_PROL_PREHEADER:%.*]] +; PROLOG: header.prol.preheader: +; PROLOG-NEXT: br label [[HEADER_PROL:%.*]] +; PROLOG: header.prol: +; PROLOG-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[LATCH_PROL:%.*]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[SUM_02_PROL:%.*]] = phi i32 [ [[ADD_PROL:%.*]], [[LATCH_PROL]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[LATCH_PROL]] ], [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: br i1 [[COND:%.*]], label [[FOR_EXIT2_LOOPEXIT1:%.*]], label [[FOR_EXITING_BLOCK_PROL:%.*]] +; PROLOG: for.exiting_block.prol: +; PROLOG-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_PROL]] +; PROLOG-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_PROL]], align 4 +; PROLOG-NEXT: [[ADD_PROL]] = add nsw i32 [[TMP1]], [[SUM_02_PROL]] +; PROLOG-NEXT: [[CMP_PROL:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], label [[LATCH_PROL]] +; PROLOG: latch.prol: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_PROL]] = add i64 [[INDVARS_IV_PROL]], 1 +; PROLOG-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 +; PROLOG-NEXT: [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 +; PROLOG-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]], label [[HEADER_PROL]] +; PROLOG: header.prol.loopexit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[LATCH_PROL]] ], [ [[SUM_02_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[LATCH_PROL]] ], [ [[INDVARS_IV_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; PROLOG: header.prol.loopexit: +; PROLOG-NEXT: [[RESULT_UNR:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[RESULT_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD_PROL]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP0]], 7 +; PROLOG-NEXT: br i1 [[TMP2]], label [[LATCHEXIT:%.*]], label [[ENTRY_NEW:%.*]] +; PROLOG: entry.new: +; PROLOG-NEXT: br label [[HEADER:%.*]] +; PROLOG: header: +; PROLOG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[LATCH_7:%.*]] ] +; PROLOG-NEXT: [[SUM_02:%.*]] = phi i32 [ [[SUM_02_UNR]], [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[LATCH_7]] ] +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; PROLOG: for.exiting_block: +; PROLOG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; PROLOG-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; PROLOG-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] +; PROLOG-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP]], label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[LATCH:%.*]] +; PROLOG: latch: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_1:%.*]] +; PROLOG: latchExit.unr-lcssa: +; PROLOG-NEXT: [[RESULT_PH:%.*]] = phi i32 [ [[SUM_02]], [[FOR_EXITING_BLOCK]] ], [ [[ADD]], [[FOR_EXITING_BLOCK_1]] ], [ [[ADD_1:%.*]], [[FOR_EXITING_BLOCK_2:%.*]] ], [ [[ADD_2:%.*]], [[FOR_EXITING_BLOCK_3:%.*]] ], [ [[ADD_3:%.*]], [[FOR_EXITING_BLOCK_4:%.*]] ], [ [[ADD_4:%.*]], [[FOR_EXITING_BLOCK_5:%.*]] ], [ [[ADD_5:%.*]], [[FOR_EXITING_BLOCK_6:%.*]] ], [ [[ADD_6:%.*]], [[FOR_EXITING_BLOCK_7:%.*]] ], [ [[ADD_7]], [[LATCH_7]] ] +; PROLOG-NEXT: br label [[LATCHEXIT]] +; PROLOG: latchExit: +; PROLOG-NEXT: [[RESULT:%.*]] = phi i32 [ [[RESULT_UNR]], [[HEADER_PROL_LOOPEXIT]] ], [ [[RESULT_PH]], [[LATCHEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: ret i32 [[RESULT]] +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: br label [[FOR_EXIT2:%.*]] +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: br label [[FOR_EXIT2]] +; PROLOG: for.exit2: +; PROLOG-NEXT: ret i32 42 +; PROLOG: for.exiting_block.1: +; PROLOG-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i64 [[INDVARS_IV]], 1 +; PROLOG-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; PROLOG-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; PROLOG-NEXT: [[ADD_1]] = add nsw i32 [[TMP4]], [[ADD]] +; PROLOG-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_1]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_1:%.*]] +; PROLOG: latch.1: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_2]] +; PROLOG: for.exiting_block.2: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add i64 [[INDVARS_IV]], 2 +; PROLOG-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; PROLOG-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; PROLOG-NEXT: [[ADD_2]] = add nsw i32 [[TMP5]], [[ADD_1]] +; PROLOG-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_2]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_2:%.*]] +; PROLOG: latch.2: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_3]] +; PROLOG: for.exiting_block.3: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add i64 [[INDVARS_IV]], 3 +; PROLOG-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; PROLOG-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; PROLOG-NEXT: [[ADD_3]] = add nsw i32 [[TMP6]], [[ADD_2]] +; PROLOG-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_3]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_3:%.*]] +; PROLOG: latch.3: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_4]] +; PROLOG: for.exiting_block.4: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add i64 [[INDVARS_IV]], 4 +; PROLOG-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; PROLOG-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; PROLOG-NEXT: [[ADD_4]] = add nsw i32 [[TMP7]], [[ADD_3]] +; PROLOG-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_4]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_4:%.*]] +; PROLOG: latch.4: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_5]] +; PROLOG: for.exiting_block.5: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add i64 [[INDVARS_IV]], 5 +; PROLOG-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; PROLOG-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; PROLOG-NEXT: [[ADD_5]] = add nsw i32 [[TMP8]], [[ADD_4]] +; PROLOG-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_5]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_5:%.*]] +; PROLOG: latch.5: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_6]] +; PROLOG: for.exiting_block.6: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add i64 [[INDVARS_IV]], 6 +; PROLOG-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; PROLOG-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; PROLOG-NEXT: [[ADD_6]] = add nsw i32 [[TMP9]], [[ADD_5]] +; PROLOG-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_6]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_6:%.*]] +; PROLOG: latch.6: +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_7]] +; PROLOG: for.exiting_block.7: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add i64 [[INDVARS_IV]], 7 +; PROLOG-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; PROLOG-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; PROLOG-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]] +; PROLOG-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_7]] +; PROLOG: latch.7: +; PROLOG-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; PROLOG-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[N]] +; PROLOG-NEXT: br i1 [[EXITCOND_7]], label [[LATCHEXIT_UNR_LCSSA]], label [[HEADER]] +; entry: br label %header @@ -364,16 +859,216 @@ ret i32 42 } -; FIXME: Support multiple exiting blocks to the unique exit block (LatchExit). +; FIXME: Support multiple exiting blocks to the unique exit block (LatchExit) +; for epilog. ; Only 2 blocks in loop: header and latch where both exit to same LatchExit. +define i32 @hdr_latch_diff_exit(i32* nocapture %a, i64 %n, i1 %cond) { +; EPILOG: hdr_latch_diff_exit( +; EPILOG-NOT: .unr +; EPILOG-NOT: .epil + +; PROLOG-LABEL: @hdr_latch_diff_exit( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1 +; PROLOG-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 7 +; PROLOG-NEXT: [[LCMP_MOD:%.*]] = icmp eq i64 [[XTRAITER]], 0 +; PROLOG-NEXT: br i1 [[LCMP_MOD]], label [[HEADER_PROL_LOOPEXIT:%.*]], label [[HEADER_PROL_PREHEADER:%.*]] +; PROLOG: header.prol.preheader: +; PROLOG-NEXT: br label [[HEADER_PROL:%.*]] +; PROLOG: header.prol: +; PROLOG-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL:%.*]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[SUM_02_PROL:%.*]] = phi i32 [ [[ADD_PROL:%.*]], [[FOR_BODY_PROL]] ], [ 0, [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ], [ [[XTRAITER]], [[HEADER_PROL_PREHEADER]] ] +; PROLOG-NEXT: br i1 [[COND:%.*]], label [[FOR_EXIT2_LOOPEXIT1:%.*]], label [[FOR_EXITING_BLOCK_PROL:%.*]] +; PROLOG: for.exiting_block.prol: +; PROLOG-NEXT: [[CMP_PROL:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PROL]] +; PROLOG: for.body.prol: +; PROLOG-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV_PROL]] +; PROLOG-NEXT: [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX_PROL]], align 4 +; PROLOG-NEXT: [[ADD_PROL]] = add nsw i32 [[TMP1]], [[SUM_02_PROL]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_PROL]] = add i64 [[INDVARS_IV_PROL]], 1 +; PROLOG-NEXT: [[PROL_ITER_SUB]] = add i64 [[PROL_ITER]], -1 +; PROLOG-NEXT: [[PROL_ITER_CMP:%.*]] = icmp eq i64 [[PROL_ITER_SUB]], 0 +; PROLOG-NEXT: br i1 [[PROL_ITER_CMP]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]], label [[HEADER_PROL]] +; PROLOG: header.prol.loopexit.unr-lcssa: +; PROLOG-NEXT: [[SUM_0_LCSSA_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ 0, [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: [[SUM_02_UNR_PH:%.*]] = phi i32 [ [[ADD_PROL]], [[FOR_BODY_PROL]] ], [ [[SUM_02_PROL]], [[FOR_EXITING_BLOCK_PROL]] ] +; PROLOG-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; PROLOG: header.prol.loopexit: +; PROLOG-NEXT: [[SUM_0_LCSSA_UNR:%.*]] = phi i32 [ undef, [[ENTRY:%.*]] ], [ [[SUM_0_LCSSA_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[SUM_02_UNR:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[SUM_02_UNR_PH]], [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] ] +; PROLOG-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP0]], 7 +; PROLOG-NEXT: br i1 [[TMP2]], label [[FOR_END:%.*]], label [[ENTRY_NEW:%.*]] +; PROLOG: entry.new: +; PROLOG-NEXT: br label [[HEADER:%.*]] +; PROLOG: header: +; PROLOG-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[ENTRY_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY_7:%.*]] ] +; PROLOG-NEXT: [[SUM_02:%.*]] = phi i32 [ [[SUM_02_UNR]], [[ENTRY_NEW]] ], [ [[ADD_7:%.*]], [[FOR_BODY_7]] ] +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT:%.*]], label [[FOR_EXITING_BLOCK:%.*]] +; PROLOG: for.exiting_block: +; PROLOG-NEXT: [[CMP:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP]], label [[FOR_END_UNR_LCSSA:%.*]], label [[FOR_BODY:%.*]] +; PROLOG: for.body: +; PROLOG-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] +; PROLOG-NEXT: [[TMP3:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; PROLOG-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT:%.*]] = add i64 [[INDVARS_IV]], 1 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_1:%.*]] +; PROLOG: for.end.unr-lcssa: +; PROLOG-NEXT: [[SUM_0_LCSSA_PH:%.*]] = phi i32 [ 0, [[FOR_EXITING_BLOCK]] ], [ 0, [[FOR_EXITING_BLOCK_1]] ], [ 0, [[FOR_EXITING_BLOCK_2:%.*]] ], [ 0, [[FOR_EXITING_BLOCK_3:%.*]] ], [ 0, [[FOR_EXITING_BLOCK_4:%.*]] ], [ 0, [[FOR_EXITING_BLOCK_5:%.*]] ], [ 0, [[FOR_EXITING_BLOCK_6:%.*]] ], [ 0, [[FOR_EXITING_BLOCK_7:%.*]] ], [ [[ADD_7]], [[FOR_BODY_7]] ] +; PROLOG-NEXT: br label [[FOR_END]] +; PROLOG: for.end: +; PROLOG-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ [[SUM_0_LCSSA_UNR]], [[HEADER_PROL_LOOPEXIT]] ], [ [[SUM_0_LCSSA_PH]], [[FOR_END_UNR_LCSSA]] ] +; PROLOG-NEXT: ret i32 [[SUM_0_LCSSA]] +; PROLOG: for.exit2.loopexit: +; PROLOG-NEXT: br label [[FOR_EXIT2:%.*]] +; PROLOG: for.exit2.loopexit1: +; PROLOG-NEXT: br label [[FOR_EXIT2]] +; PROLOG: for.exit2: +; PROLOG-NEXT: ret i32 42 +; PROLOG: for.exiting_block.1: +; PROLOG-NEXT: [[CMP_1:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_1]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_1:%.*]] +; PROLOG: for.body.1: +; PROLOG-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]] +; PROLOG-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARRAYIDX_1]], align 4 +; PROLOG-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add i64 [[INDVARS_IV]], 2 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_2]] +; PROLOG: for.exiting_block.2: +; PROLOG-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_2]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_2:%.*]] +; PROLOG: for.body.2: +; PROLOG-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_1]] +; PROLOG-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARRAYIDX_2]], align 4 +; PROLOG-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add i64 [[INDVARS_IV]], 3 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_3]] +; PROLOG: for.exiting_block.3: +; PROLOG-NEXT: [[CMP_3:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_3]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_3:%.*]] +; PROLOG: for.body.3: +; PROLOG-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_2]] +; PROLOG-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARRAYIDX_3]], align 4 +; PROLOG-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add i64 [[INDVARS_IV]], 4 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_4]] +; PROLOG: for.exiting_block.4: +; PROLOG-NEXT: [[CMP_4:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_4]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_4:%.*]] +; PROLOG: for.body.4: +; PROLOG-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_3]] +; PROLOG-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX_4]], align 4 +; PROLOG-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add i64 [[INDVARS_IV]], 5 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_5]] +; PROLOG: for.exiting_block.5: +; PROLOG-NEXT: [[CMP_5:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_5]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_5:%.*]] +; PROLOG: for.body.5: +; PROLOG-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_4]] +; PROLOG-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX_5]], align 4 +; PROLOG-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add i64 [[INDVARS_IV]], 6 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_6]] +; PROLOG: for.exiting_block.6: +; PROLOG-NEXT: [[CMP_6:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_6]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_6:%.*]] +; PROLOG: for.body.6: +; PROLOG-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_5]] +; PROLOG-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX_6]], align 4 +; PROLOG-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add i64 [[INDVARS_IV]], 7 +; PROLOG-NEXT: br i1 [[COND]], label [[FOR_EXIT2_LOOPEXIT]], label [[FOR_EXITING_BLOCK_7]] +; PROLOG: for.exiting_block.7: +; PROLOG-NEXT: [[CMP_7:%.*]] = icmp eq i64 [[N]], 42 +; PROLOG-NEXT: br i1 [[CMP_7]], label [[FOR_END_UNR_LCSSA]], label [[FOR_BODY_7]] +; PROLOG: for.body.7: +; PROLOG-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT_6]] +; PROLOG-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX_7]], align 4 +; PROLOG-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]] +; PROLOG-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8 +; PROLOG-NEXT: [[EXITCOND_7:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_7]], [[N]] +; PROLOG-NEXT: br i1 [[EXITCOND_7]], label [[FOR_END_UNR_LCSSA]], label [[HEADER]] +; +entry: + br label %header + +header: + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] + %sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ] + br i1 %cond, label %for.exit2, label %for.exiting_block + +for.exiting_block: + %cmp = icmp eq i64 %n, 42 + br i1 %cmp, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv + %0 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %0, %sum.02 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond, label %for.end, label %header + +for.end: ; preds = %for.body, %entry + %sum.0.lcssa = phi i32 [ 0, %for.exiting_block ], [ %add, %for.body ] + ret i32 %sum.0.lcssa + +for.exit2: + ret i32 42 +} +; FIXME: Support multiple exiting blocks to the unique exit block for epilog. define void @unique_exit(i32 %arg) { ; EPILOG: unique_exit( ; EPILOG-NOT: .unr ; EPILOG-NOT: .epil -; PROLOG: unique_exit( -; PROLOG-NOT: .unr -; PROLOG-NOT: .prol +; PROLOG-LABEL: @unique_exit( +; PROLOG-NEXT: entry: +; PROLOG-NEXT: br i1 false, label [[PREHEADER:%.*]], label [[RETURNBLOCK:%.*]] +; PROLOG: preheader: +; PROLOG-NEXT: br i1 false, label [[HEADER_PROL_PREHEADER:%.*]], label [[HEADER_PROL_LOOPEXIT:%.*]] +; PROLOG: header.prol.preheader: +; PROLOG-NEXT: br label [[HEADER_PROL:%.*]] +; PROLOG: header.prol: +; PROLOG-NEXT: br i1 true, label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA:%.*]], label [[LATCH_PROL:%.*]] +; PROLOG: latch.prol: +; PROLOG-NEXT: br i1 undef, label [[HEADER_PROL]], label [[HEADER_PROL_LOOPEXIT_UNR_LCSSA]] +; PROLOG: header.prol.loopexit.unr-lcssa: +; PROLOG-NEXT: br label [[HEADER_PROL_LOOPEXIT]] +; PROLOG: header.prol.loopexit: +; PROLOG-NEXT: br i1 false, label [[LATCHEXIT:%.*]], label [[PREHEADER_NEW:%.*]] +; PROLOG: preheader.new: +; PROLOG-NEXT: br label [[HEADER:%.*]] +; PROLOG: header: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA:%.*]], label [[LATCH:%.*]] +; PROLOG: latch: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_1:%.*]] +; PROLOG: latchExit.unr-lcssa: +; PROLOG-NEXT: br label [[LATCHEXIT]] +; PROLOG: latchExit: +; PROLOG-NEXT: br label [[RETURNBLOCK]] +; PROLOG: returnblock: +; PROLOG-NEXT: ret void +; PROLOG: latch.1: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_2:%.*]] +; PROLOG: latch.2: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_3:%.*]] +; PROLOG: latch.3: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_4:%.*]] +; PROLOG: latch.4: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_5:%.*]] +; PROLOG: latch.5: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_6:%.*]] +; PROLOG: latch.6: +; PROLOG-NEXT: br i1 true, label [[LATCHEXIT_UNR_LCSSA]], label [[LATCH_7:%.*]] +; PROLOG: latch.7: +; PROLOG-NEXT: br i1 false, label [[HEADER]], label [[LATCHEXIT_UNR_LCSSA]] +; entry: %tmp = icmp sgt i32 undef, %arg br i1 %tmp, label %preheader, label %returnblock