Index: llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" @@ -35,6 +36,7 @@ #include "llvm/Transforms/Utils.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/UnrollLoop.h" @@ -299,17 +301,15 @@ PreserveLCSSA); } -/// Create a clone of the blocks in a loop and connect them together. -/// If CreateRemainderLoop is false, loop structure will not be cloned, -/// otherwise a new loop will be created including all cloned blocks, and the -/// iterator of it switches to count NewIter down to 0. +/// Create a clone of the blocks in a loop and connect them together. A new +/// loop will be created including all cloned blocks, and the iterator of the +/// new loop switched to count NewIter down to 0. /// The cloned blocks should be inserted between InsertTop and InsertBot. -/// If loop structure is cloned InsertTop should be new preheader, InsertBot -/// new loop exit. -/// Return the new cloned loop that is created when CreateRemainderLoop is true. +/// InsertTop should be new preheader, InsertBot new loop exit. +/// Returns the new cloned loop that is created. static Loop * -CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, - const bool UseEpilogRemainder, const bool UnrollRemainder, +CloneLoopBlocks(Loop *L, Value *NewIter, const bool UseEpilogRemainder, + const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector &NewBlocks, LoopBlocksDFS &LoopBlocks, @@ -323,8 +323,6 @@ Loop *ParentLoop = L->getParentLoop(); NewLoopsMap NewLoops; NewLoops[ParentLoop] = ParentLoop; - if (!CreateRemainderLoop) - NewLoops[L] = ParentLoop; // For each block in the original loop, create a new copy, // and update the value map with the newly created values. @@ -332,11 +330,7 @@ BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); NewBlocks.push_back(NewBB); - // If we're unrolling the outermost loop, there's no remainder loop, - // and this block isn't in a nested loop, then the new block is not - // in any loop. Otherwise, add it to loopinfo. - if (CreateRemainderLoop || LI->getLoopFor(*BB) != L || ParentLoop) - addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); + addClonedBlockToLoopInfo(*BB, NewBB, LI, NewLoops); VMap[*BB] = NewBB; if (Header == *BB) { @@ -357,27 +351,22 @@ } if (Latch == *BB) { - // For the last block, if CreateRemainderLoop is false, create a direct - // jump to InsertBot. If not, create a loop back to cloned head. + // For the last block, create a loop back to cloned head. VMap.erase((*BB)->getTerminator()); BasicBlock *FirstLoopBB = cast(VMap[Header]); BranchInst *LatchBR = cast(NewBB->getTerminator()); IRBuilder<> Builder(LatchBR); - if (!CreateRemainderLoop) { - Builder.CreateBr(InsertBot); - } else { - PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, - suffix + ".iter", - FirstLoopBB->getFirstNonPHI()); - Value *IdxSub = - Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), - NewIdx->getName() + ".sub"); - Value *IdxCmp = - Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); - Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); - NewIdx->addIncoming(NewIter, InsertTop); - NewIdx->addIncoming(IdxSub, NewBB); - } + PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, + suffix + ".iter", + FirstLoopBB->getFirstNonPHI()); + Value *IdxSub = + Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), + NewIdx->getName() + ".sub"); + Value *IdxCmp = + Builder.CreateIsNotNull(IdxSub, NewIdx->getName() + ".cmp"); + Builder.CreateCondBr(IdxCmp, FirstLoopBB, InsertBot); + NewIdx->addIncoming(NewIter, InsertTop); + NewIdx->addIncoming(IdxSub, NewBB); LatchBR->eraseFromParent(); } } @@ -386,28 +375,15 @@ // cloned loop. for (BasicBlock::iterator I = Header->begin(); isa(I); ++I) { PHINode *NewPHI = cast(VMap[&*I]); - if (!CreateRemainderLoop) { - if (UseEpilogRemainder) { - unsigned idx = NewPHI->getBasicBlockIndex(Preheader); - NewPHI->setIncomingBlock(idx, InsertTop); - NewPHI->removeIncomingValue(Latch, false); - } else { - VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); - cast(VMap[Header])->getInstList().erase(NewPHI); - } - } else { - unsigned idx = NewPHI->getBasicBlockIndex(Preheader); - NewPHI->setIncomingBlock(idx, InsertTop); - BasicBlock *NewLatch = cast(VMap[Latch]); - idx = NewPHI->getBasicBlockIndex(Latch); - Value *InVal = NewPHI->getIncomingValue(idx); - NewPHI->setIncomingBlock(idx, NewLatch); - if (Value *V = VMap.lookup(InVal)) - NewPHI->setIncomingValue(idx, V); - } + unsigned idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(idx, InsertTop); + BasicBlock *NewLatch = cast(VMap[Latch]); + idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(idx); + NewPHI->setIncomingBlock(idx, NewLatch); + if (Value *V = VMap.lookup(InVal)) + NewPHI->setIncomingValue(idx, V); } - if (!CreateRemainderLoop) - return nullptr; Loop *NewLoop = NewLoops[L]; assert(NewLoop && "L should have been cloned"); @@ -819,18 +795,13 @@ std::vector NewBlocks; ValueToValueMapTy VMap; - // For unroll factor 2 remainder loop will have 1 iterations. - // Do not create 1 iteration loop. - bool CreateRemainderLoop = (Count != 2); - // Clone all the basic blocks in the loop. If Count is 2, we don't clone // the loop, otherwise we create a cloned loop to execute the extra // iterations. This function adds the appropriate CFG connections. BasicBlock *InsertBot = UseEpilogRemainder ? LatchExit : PrologExit; BasicBlock *InsertTop = UseEpilogRemainder ? EpilogPreHeader : PrologPreHeader; Loop *remainderLoop = CloneLoopBlocks( - L, ModVal, CreateRemainderLoop, UseEpilogRemainder, UnrollRemainder, - InsertTop, InsertBot, + L, ModVal, UseEpilogRemainder, UnrollRemainder, InsertTop, InsertBot, NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI); // Assign the maximum possible trip count as the back edge weight for the @@ -974,6 +945,41 @@ assert(DT->verify(DominatorTree::VerificationLevel::Full)); #endif + // For unroll factor 2 remainder loop will have 1 iteration. + if (Count == 2 && DT && LI && SE) { + // TODO: This code could probably be pulled out into a helper function + // (e.g. breakLoopBackedgeAndSimplify) and reused in loop-deletion. + BasicBlock *RemainderLatch = remainderLoop->getLoopLatch(); + SmallVector RemainderBlocks(remainderLoop->getBlocks().begin(), + remainderLoop->getBlocks().end()); + breakLoopBackedge(remainderLoop, *DT, *SE, *LI, nullptr); + remainderLoop = nullptr; + + // Simplify loop values after breaking the backedge + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + SmallVector DeadInsts; + for (BasicBlock *BB : RemainderBlocks) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Inst = &*I++; + if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) + if (LI->replacementPreservesLCSSAForm(Inst, V)) + Inst->replaceAllUsesWith(V); + if (isInstructionTriviallyDead(Inst)) + DeadInsts.emplace_back(Inst); + } + // We can't do recursive deletion until we're done iterating, as we might + // have a phi which (potentially indirectly) uses instructions later in + // the block we're iterating through. + RecursivelyDeleteTriviallyDeadInstructions(DeadInsts); + } + + // Merge latch into exit block. + auto *ExitBB = RemainderLatch->getSingleSuccessor(); + assert(ExitBB && "required after breaking cond br backedge"); + DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); + MergeBlockIntoPredecessor(ExitBB, &DTU, LI); + } + // Canonicalize to LoopSimplifyForm both original and remainder loops. We // cannot rely on the LoopUnrollPass to do this because it only does // canonicalization for parent/subloops and not the sibling loops. Index: llvm/lib/Transforms/Utils/LoopUtils.cpp =================================================================== --- llvm/lib/Transforms/Utils/LoopUtils.cpp +++ llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -740,7 +740,7 @@ IRBuilder<> Builder(BI); auto *NewBI = Builder.CreateBr(ExitBB); // Transfer the metadata to the new branch instruction. - NewBI->copyMetadata(*BI, {LLVMContext::MD_loop, LLVMContext::MD_dbg, + NewBI->copyMetadata(*BI, {LLVMContext::MD_dbg, LLVMContext::MD_annotation}); BI->eraseFromParent(); Index: llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll +++ llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors-inseltpoison.ll @@ -22,11 +22,11 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; CHECK: vector.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]] +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] @@ -61,44 +61,37 @@ ; CHECK: middle.block.unr-lcssa: ; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]] ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] ; CHECK: vector.body.epil: -; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> , [[VEC_IND12_EPIL]] +; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> , [[VEC_IND12_UNR]] ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> , <16 x i8> -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]] ; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1 -; CHECK-NEXT: [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]] -; CHECK-NEXT: br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]] -; CHECK: middle.block.epilog-lcssa: ; CHECK-NEXT: br label [[MIDDLE_BLOCK]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] -; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]] -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP23]], 7 -; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1 +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP23]], [[INDVARS_IV_PH]] +; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP22]], 7 +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]] ; CHECK: for.body.prol.preheader: ; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] ; CHECK: for.body.prol: ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ] -; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 -; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 +; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]] ; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]] ; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0 ; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49 @@ -108,76 +101,76 @@ ; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.body.prol.loopexit.unr-lcssa: ; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] ; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]] ; CHECK: for.body.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7 -; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp ult i64 [[TMP24]], 7 +; CHECK-NEXT: br i1 [[TMP26]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; CHECK: for.body.preheader.new: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP28]] +; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP27]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP29]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP28]] ; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]] ; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0 ; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 -; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP30]] +; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 +; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP29]] ; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]] ; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0 ; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 -; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP31]] +; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 +; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP30]] ; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]] ; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0 ; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 -; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP32]] +; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 +; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP31]] ; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]] ; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0 ; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]] ; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 -; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP33]] +; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 +; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP32]] ; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]] ; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0 ; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]] ; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 -; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP34]] +; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 +; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP33]] ; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]] ; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0 ; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]] ; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 -; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP35]] +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 +; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP34]] ; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]] ; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0 ; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49 Index: llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll +++ llvm/test/Transforms/LoopUnroll/PowerPC/p8-unrolling-legalize-vectors.ll @@ -22,11 +22,11 @@ ; CHECK-NEXT: [[TMP0:%.*]] = add nsw i64 [[N_VEC]], -16 ; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 4 ; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 -; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP2]], 1 +; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP2]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP1]], 1 ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK_UNR_LCSSA:%.*]], label [[VECTOR_PH_NEW:%.*]] ; CHECK: vector.ph.new: -; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER1]] +; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[TMP2]], [[XTRAITER]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH_NEW]] ], [ [[INDEX_NEXT_1:%.*]], [[VECTOR_BODY]] ] @@ -61,44 +61,37 @@ ; CHECK: middle.block.unr-lcssa: ; CHECK-NEXT: [[INDEX_UNR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ] ; CHECK-NEXT: [[VEC_IND12_UNR:%.*]] = phi <16 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND12_UNR_PH]], [[MIDDLE_BLOCK_UNR_LCSSA_LOOPEXIT]] ] -; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]] +; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[VECTOR_BODY_EPIL_PREHEADER:%.*]], label [[MIDDLE_BLOCK:%.*]] ; CHECK: vector.body.epil.preheader: ; CHECK-NEXT: br label [[VECTOR_BODY_EPIL:%.*]] ; CHECK: vector.body.epil: -; CHECK-NEXT: [[INDEX_EPIL:%.*]] = phi i64 [ [[INDEX_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[VEC_IND12_EPIL:%.*]] = phi <16 x i32> [ [[VEC_IND12_UNR]], [[VECTOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> , [[VEC_IND12_EPIL]] +; CHECK-NEXT: [[TMP16:%.*]] = shl <16 x i32> , [[VEC_IND12_UNR]] ; CHECK-NEXT: [[TMP17:%.*]] = and <16 x i32> [[TMP16]], [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq <16 x i32> [[TMP17]], zeroinitializer ; CHECK-NEXT: [[TMP19:%.*]] = select <16 x i1> [[TMP18]], <16 x i8> , <16 x i8> -; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_EPIL]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDEX_UNR]] ; CHECK-NEXT: [[TMP21:%.*]] = bitcast i8* [[TMP20]] to <16 x i8>* ; CHECK-NEXT: store <16 x i8> [[TMP19]], <16 x i8>* [[TMP21]], align 1 -; CHECK-NEXT: [[INDEX_NEXT_EPIL:%.*]] = add i64 [[INDEX_EPIL]], 16 -; CHECK-NEXT: [[VEC_IND_NEXT13_EPIL:%.*]] = add <16 x i32> [[VEC_IND12_EPIL]], -; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT_EPIL]], [[N_VEC]] -; CHECK-NEXT: br label [[MIDDLE_BLOCK_EPILOG_LCSSA:%.*]] -; CHECK: middle.block.epilog-lcssa: ; CHECK-NEXT: br label [[MIDDLE_BLOCK]] ; CHECK: middle.block: ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY_PREHEADER]] ; CHECK: for.body.preheader: ; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ] -; CHECK-NEXT: [[TMP23:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] -; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1 -; CHECK-NEXT: [[TMP25:%.*]] = sub i64 [[TMP24]], [[INDVARS_IV_PH]] -; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[TMP23]], 7 -; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0 -; CHECK-NEXT: br i1 [[LCMP_MOD]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]] +; CHECK-NEXT: [[TMP22:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[INDVARS_IV_PH]] +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[WIDE_TRIP_COUNT]], -1 +; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[TMP23]], [[INDVARS_IV_PH]] +; CHECK-NEXT: [[XTRAITER1:%.*]] = and i64 [[TMP22]], 7 +; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER1]], 0 +; CHECK-NEXT: br i1 [[LCMP_MOD2]], label [[FOR_BODY_PROL_PREHEADER:%.*]], label [[FOR_BODY_PROL_LOOPEXIT:%.*]] ; CHECK: for.body.prol.preheader: ; CHECK-NEXT: br label [[FOR_BODY_PROL:%.*]] ; CHECK: for.body.prol: ; CHECK-NEXT: [[INDVARS_IV_PROL:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL:%.*]], [[FOR_BODY_PROL]] ], [ [[INDVARS_IV_PH]], [[FOR_BODY_PROL_PREHEADER]] ] -; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ] -; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 -; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP26]] +; CHECK-NEXT: [[PROL_ITER:%.*]] = phi i64 [ [[XTRAITER1]], [[FOR_BODY_PROL_PREHEADER]] ], [ [[PROL_ITER_SUB:%.*]], [[FOR_BODY_PROL]] ] +; CHECK-NEXT: [[TMP25:%.*]] = trunc i64 [[INDVARS_IV_PROL]] to i32 +; CHECK-NEXT: [[SHL_PROL:%.*]] = shl i32 1, [[TMP25]] ; CHECK-NEXT: [[AND_PROL:%.*]] = and i32 [[SHL_PROL]], [[X]] ; CHECK-NEXT: [[TOBOOL_PROL:%.*]] = icmp eq i32 [[AND_PROL]], 0 ; CHECK-NEXT: [[CONV_PROL:%.*]] = select i1 [[TOBOOL_PROL]], i8 48, i8 49 @@ -108,76 +101,76 @@ ; CHECK-NEXT: [[EXITCOND_PROL:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_PROL]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: [[PROL_ITER_SUB]] = sub i64 [[PROL_ITER]], 1 ; CHECK-NEXT: [[PROL_ITER_CMP:%.*]] = icmp ne i64 [[PROL_ITER_SUB]], 0 -; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop !0 +; CHECK-NEXT: br i1 [[PROL_ITER_CMP]], label [[FOR_BODY_PROL]], label [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: for.body.prol.loopexit.unr-lcssa: ; CHECK-NEXT: [[INDVARS_IV_UNR_PH:%.*]] = phi i64 [ [[INDVARS_IV_NEXT_PROL]], [[FOR_BODY_PROL]] ] ; CHECK-NEXT: br label [[FOR_BODY_PROL_LOOPEXIT]] ; CHECK: for.body.prol.loopexit: ; CHECK-NEXT: [[INDVARS_IV_UNR:%.*]] = phi i64 [ [[INDVARS_IV_PH]], [[FOR_BODY_PREHEADER]] ], [ [[INDVARS_IV_UNR_PH]], [[FOR_BODY_PROL_LOOPEXIT_UNR_LCSSA]] ] -; CHECK-NEXT: [[TMP27:%.*]] = icmp ult i64 [[TMP25]], 7 -; CHECK-NEXT: br i1 [[TMP27]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] +; CHECK-NEXT: [[TMP26:%.*]] = icmp ult i64 [[TMP24]], 7 +; CHECK-NEXT: br i1 [[TMP26]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]] ; CHECK: for.body.preheader.new: ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_UNR]], [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ] -; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP28]] +; CHECK-NEXT: [[TMP27:%.*]] = trunc i64 [[INDVARS_IV]] to i32 +; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[TMP27]] ; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], [[X]] ; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[AND]], 0 ; CHECK-NEXT: [[CONV:%.*]] = select i1 [[TOBOOL]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store i8 [[CONV]], i8* [[ARRAYIDX]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 -; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP29]] +; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 +; CHECK-NEXT: [[SHL_1:%.*]] = shl i32 1, [[TMP28]] ; CHECK-NEXT: [[AND_1:%.*]] = and i32 [[SHL_1]], [[X]] ; CHECK-NEXT: [[TOBOOL_1:%.*]] = icmp eq i32 [[AND_1]], 0 ; CHECK-NEXT: [[CONV_1:%.*]] = select i1 [[TOBOOL_1]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT]] ; CHECK-NEXT: store i8 [[CONV_1]], i8* [[ARRAYIDX_1]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT]], 1 -; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 -; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP30]] +; CHECK-NEXT: [[TMP29:%.*]] = trunc i64 [[INDVARS_IV_NEXT_1]] to i32 +; CHECK-NEXT: [[SHL_2:%.*]] = shl i32 1, [[TMP29]] ; CHECK-NEXT: [[AND_2:%.*]] = and i32 [[SHL_2]], [[X]] ; CHECK-NEXT: [[TOBOOL_2:%.*]] = icmp eq i32 [[AND_2]], 0 ; CHECK-NEXT: [[CONV_2:%.*]] = select i1 [[TOBOOL_2]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_1]] ; CHECK-NEXT: store i8 [[CONV_2]], i8* [[ARRAYIDX_2]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_1]], 1 -; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 -; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP31]] +; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[INDVARS_IV_NEXT_2]] to i32 +; CHECK-NEXT: [[SHL_3:%.*]] = shl i32 1, [[TMP30]] ; CHECK-NEXT: [[AND_3:%.*]] = and i32 [[SHL_3]], [[X]] ; CHECK-NEXT: [[TOBOOL_3:%.*]] = icmp eq i32 [[AND_3]], 0 ; CHECK-NEXT: [[CONV_3:%.*]] = select i1 [[TOBOOL_3]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_2]] ; CHECK-NEXT: store i8 [[CONV_3]], i8* [[ARRAYIDX_3]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_2]], 1 -; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 -; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP32]] +; CHECK-NEXT: [[TMP31:%.*]] = trunc i64 [[INDVARS_IV_NEXT_3]] to i32 +; CHECK-NEXT: [[SHL_4:%.*]] = shl i32 1, [[TMP31]] ; CHECK-NEXT: [[AND_4:%.*]] = and i32 [[SHL_4]], [[X]] ; CHECK-NEXT: [[TOBOOL_4:%.*]] = icmp eq i32 [[AND_4]], 0 ; CHECK-NEXT: [[CONV_4:%.*]] = select i1 [[TOBOOL_4]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_3]] ; CHECK-NEXT: store i8 [[CONV_4]], i8* [[ARRAYIDX_4]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_3]], 1 -; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 -; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP33]] +; CHECK-NEXT: [[TMP32:%.*]] = trunc i64 [[INDVARS_IV_NEXT_4]] to i32 +; CHECK-NEXT: [[SHL_5:%.*]] = shl i32 1, [[TMP32]] ; CHECK-NEXT: [[AND_5:%.*]] = and i32 [[SHL_5]], [[X]] ; CHECK-NEXT: [[TOBOOL_5:%.*]] = icmp eq i32 [[AND_5]], 0 ; CHECK-NEXT: [[CONV_5:%.*]] = select i1 [[TOBOOL_5]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_4]] ; CHECK-NEXT: store i8 [[CONV_5]], i8* [[ARRAYIDX_5]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_4]], 1 -; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 -; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP34]] +; CHECK-NEXT: [[TMP33:%.*]] = trunc i64 [[INDVARS_IV_NEXT_5]] to i32 +; CHECK-NEXT: [[SHL_6:%.*]] = shl i32 1, [[TMP33]] ; CHECK-NEXT: [[AND_6:%.*]] = and i32 [[SHL_6]], [[X]] ; CHECK-NEXT: [[TOBOOL_6:%.*]] = icmp eq i32 [[AND_6]], 0 ; CHECK-NEXT: [[CONV_6:%.*]] = select i1 [[TOBOOL_6]], i8 48, i8 49 ; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i8, i8* [[S]], i64 [[INDVARS_IV_NEXT_5]] ; CHECK-NEXT: store i8 [[CONV_6]], i8* [[ARRAYIDX_6]], align 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = add nuw nsw i64 [[INDVARS_IV_NEXT_5]], 1 -; CHECK-NEXT: [[TMP35:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 -; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP35]] +; CHECK-NEXT: [[TMP34:%.*]] = trunc i64 [[INDVARS_IV_NEXT_6]] to i32 +; CHECK-NEXT: [[SHL_7:%.*]] = shl i32 1, [[TMP34]] ; CHECK-NEXT: [[AND_7:%.*]] = and i32 [[SHL_7]], [[X]] ; CHECK-NEXT: [[TOBOOL_7:%.*]] = icmp eq i32 [[AND_7]], 0 ; CHECK-NEXT: [[CONV_7:%.*]] = select i1 [[TOBOOL_7]], i8 48, i8 49 Index: llvm/test/Transforms/LoopUnroll/revisit.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/revisit.ll +++ llvm/test/Transforms/LoopUnroll/revisit.ll @@ -140,10 +140,10 @@ ; ; Revisit the children of the outer loop that are part of the epilogue. ; -; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil
-; CHECK-NOT: LoopFullUnrollPass ; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.1.epil
; CHECK-NOT: LoopFullUnrollPass +; CHECK: LoopFullUnrollPass on Loop at depth 2 containing: %l0.0.0.epil
+; CHECK-NOT: LoopFullUnrollPass l0.latch: br label %l0 ; CHECK: LoopFullUnrollPass on Loop at depth 1 containing: %l0
Index: llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop-multiple-exits.ll @@ -157,26 +157,19 @@ ; EPILOG-BLOCK: exit1: ; EPILOG-BLOCK-NEXT: ret void ; EPILOG-BLOCK: exit2.loopexit.unr-lcssa.loopexit: -; EPILOG-BLOCK-NEXT: %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.unr-lcssa ; EPILOG-BLOCK: exit2.loopexit.unr-lcssa: -; EPILOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 ; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit ; EPILOG-BLOCK: loop_header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %loop_header.epil ; EPILOG-BLOCK: loop_header.epil: -; EPILOG-BLOCK-NEXT: %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.epil, label %loop_exiting_bb1.epil ; EPILOG-BLOCK: loop_exiting_bb1.epil: ; EPILOG-BLOCK-NEXT: br i1 false, label %loop_exiting_bb2.epil, label %exit1 ; EPILOG-BLOCK: loop_exiting_bb2.epil: ; EPILOG-BLOCK-NEXT: br i1 false, label %loop_latch.epil, label %exit3 ; EPILOG-BLOCK: loop_latch.epil: -; EPILOG-BLOCK-NEXT: %iv_next.epil = add i64 %iv.epil, 1 -; EPILOG-BLOCK-NEXT: %cmp.epil = icmp ne i64 %iv_next.epil, %trip -; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.epilog-lcssa -; EPILOG-BLOCK: exit2.loopexit.epilog-lcssa: ; EPILOG-BLOCK-NEXT: br label %exit2.loopexit ; EPILOG-BLOCK: exit2.loopexit: ; EPILOG-BLOCK-NEXT: ret void @@ -313,11 +306,9 @@ ; PROLOG-BLOCK: loop_exiting_bb2.prol: ; PROLOG-BLOCK-NEXT: br i1 false, label %loop_latch.prol, label %exit3 ; PROLOG-BLOCK: loop_latch.prol: -; PROLOG-BLOCK-NEXT: %iv_next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %cmp.prol = icmp ne i64 %iv_next.prol, %trip ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ] +; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] ; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %1, label %exit2.loopexit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -568,29 +559,23 @@ ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2, label %for.exiting_block.epil ; EPILOG-BLOCK: for.exiting_block.epil: ; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %for.exit2, label %for.body.epil ; EPILOG-BLOCK: for.body.epil: -; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil +; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4 -; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil -; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1 -; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n -; EPILOG-BLOCK-NEXT: br label %for.end.epilog-lcssa -; EPILOG-BLOCK: for.end.epilog-lcssa: +; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %for.end ; EPILOG-BLOCK: for.end: -; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.end.epilog-lcssa ] +; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %for.end.unr-lcssa ], [ %add.epil, %for.body.epil ] ; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; EPILOG-BLOCK: for.exit2.loopexit: ; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %for.body ], [ 42, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %for.exit2 ; EPILOG-BLOCK: for.exit2: -; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.epil, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] +; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] ; EPILOG-BLOCK-NEXT: ret i32 %retval ; EPILOG-BLOCK: for.exiting_block.1: ; EPILOG-BLOCK-NEXT: %cmp.1 = icmp eq i64 %n, 42 @@ -749,16 +734,12 @@ ; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %for.exit2, label %for.body.prol ; PROLOG-BLOCK: for.body.prol: -; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0 -; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4 -; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0 -; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n +; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4 ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %add.prol, %for.body.prol ] -; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %for.body.prol ] -; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %for.body.prol ] +; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %1, %for.body.prol ] +; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %for.body.prol ] +; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %for.body.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %for.end, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -1006,31 +987,22 @@ ; EPILOG-BLOCK: exit1: ; EPILOG-BLOCK-NEXT: ret void ; EPILOG-BLOCK: exit2.loopexit.unr-lcssa.loopexit: -; EPILOG-BLOCK-NEXT: %iv.unr.ph = phi i64 [ %iv_next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: %sum.unr.ph = phi i64 [ %sum.next.1, %loop_latch.1 ] ; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.unr-lcssa ; EPILOG-BLOCK: exit2.loopexit.unr-lcssa: -; EPILOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.unr.ph, %exit2.loopexit.unr-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 ; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %loop_header.epil.preheader, label %exit2.loopexit ; EPILOG-BLOCK: loop_header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %loop_header.epil ; EPILOG-BLOCK: loop_header.epil: -; EPILOG-BLOCK-NEXT: %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 undef, label %loop_latch.epil, label %loop_exiting_bb1.epil ; EPILOG-BLOCK: loop_exiting_bb1.epil: -; EPILOG-BLOCK-NEXT: switch i64 %sum.epil, label %loop_latch.epil [ +; EPILOG-BLOCK-NEXT: switch i64 %sum.unr, label %loop_latch.epil [ ; EPILOG-BLOCK-NEXT: i64 24, label %exit1 ; EPILOG-BLOCK-NEXT: i64 42, label %exit3 ; EPILOG-BLOCK-NEXT: ] ; EPILOG-BLOCK: loop_latch.epil: -; EPILOG-BLOCK-NEXT: %iv_next.epil = add nuw nsw i64 %iv.epil, 1 -; EPILOG-BLOCK-NEXT: %sum.next.epil = add i64 %sum.epil, %add -; EPILOG-BLOCK-NEXT: %cmp.epil = icmp ne i64 %iv_next.epil, %trip -; EPILOG-BLOCK-NEXT: br label %exit2.loopexit.epilog-lcssa -; EPILOG-BLOCK: exit2.loopexit.epilog-lcssa: ; EPILOG-BLOCK-NEXT: br label %exit2.loopexit ; EPILOG-BLOCK: exit2.loopexit: ; EPILOG-BLOCK-NEXT: ret void @@ -1192,13 +1164,10 @@ ; PROLOG-BLOCK-NEXT: i64 42, label %exit3 ; PROLOG-BLOCK-NEXT: ] ; PROLOG-BLOCK: loop_latch.prol: -; PROLOG-BLOCK-NEXT: %iv_next.prol = add nuw nsw i64 0, 1 -; PROLOG-BLOCK-NEXT: %sum.next.prol = add i64 0, %add -; PROLOG-BLOCK-NEXT: %cmp.prol = icmp ne i64 %iv_next.prol, %trip ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.next.prol, %loop_latch.prol ] +; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ] ; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %1, label %exit2.loopexit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -1455,18 +1424,14 @@ ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 %cond, label %latchExit.epilog-lcssa, label %for.exiting_block.epil ; EPILOG-BLOCK: for.exiting_block.epil: ; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %for.exit2, label %latch.epil ; EPILOG-BLOCK: latch.epil: -; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil +; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4 -; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil -; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1 -; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n +; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: ; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 0, %header ], [ 0, %latch ] @@ -1641,16 +1606,12 @@ ; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %for.exit2, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0 -; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4 -; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0 -; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n +; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4 ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -1907,18 +1868,14 @@ ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.epil ; EPILOG-BLOCK: for.exiting_block.epil: ; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil ; EPILOG-BLOCK: latch.epil: -; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil +; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4 -; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil -; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1 -; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n +; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: ; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ 2, %for.exiting_block ], [ 2, %for.exiting_block.1 ] @@ -2093,16 +2050,12 @@ ; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0 -; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4 -; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0 -; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n +; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4 ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -2360,24 +2313,20 @@ ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.epil ; EPILOG-BLOCK: for.exiting_block.epil: ; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil ; EPILOG-BLOCK: latch.epil: -; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil +; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4 -; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil -; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1 -; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n +; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: ; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa: -; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ] +; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: br label %latchExit ; EPILOG-BLOCK: latchExit: ; EPILOG-BLOCK-NEXT: %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ] @@ -2546,16 +2495,12 @@ ; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0 -; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4 -; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0 -; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n +; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4 ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -2814,24 +2759,20 @@ ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.epil ; EPILOG-BLOCK: for.exiting_block.epil: -; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil +; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr ; EPILOG-BLOCK-NEXT: %3 = load i32, i32* %arrayidx.epil, align 4 -; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.epil +; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %3, %sum.02.unr ; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %latchExit.epilog-lcssa, label %latch.epil ; EPILOG-BLOCK: latch.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1 -; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa.loopexit: ; EPILOG-BLOCK-NEXT: %result.ph1.ph = phi i32 [ %sum.02, %for.exiting_block ], [ %add, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %latchExit.epilog-lcssa ; EPILOG-BLOCK: latchExit.epilog-lcssa: -; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.epil, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ] +; EPILOG-BLOCK-NEXT: %result.ph1 = phi i32 [ %add.epil, %latch.epil ], [ %sum.02.unr, %for.exiting_block.epil ], [ %result.ph1.ph, %latchExit.epilog-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: br label %latchExit ; EPILOG-BLOCK: latchExit: ; EPILOG-BLOCK-NEXT: %result = phi i32 [ %result.ph, %latchExit.unr-lcssa ], [ %result.ph1, %latchExit.epilog-lcssa ] @@ -2997,19 +2938,15 @@ ; PROLOG-BLOCK: header.prol: ; PROLOG-BLOCK-NEXT: br i1 %cond, label %for.exit2, label %for.exiting_block.prol ; PROLOG-BLOCK: for.exiting_block.prol: -; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0 -; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %arrayidx.prol, align 4 -; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %1, 0 +; PROLOG-BLOCK-NEXT: %1 = load i32, i32* %a, align 4 ; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %latchExit.unr-lcssa, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %result.unr = phi i32 [ undef, %entry ], [ %1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchExit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -3324,24 +3261,18 @@ ; EPILOG-BLOCK: loop_header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %loop_header.epil ; EPILOG-BLOCK: loop_header.epil: -; EPILOG-BLOCK-NEXT: %iv.epil = phi i64 [ %iv.unr, %loop_header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.epil = phi i64 [ %sum.unr, %loop_header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.epil, label %loop_exiting.epil ; EPILOG-BLOCK: loop_exiting.epil: -; EPILOG-BLOCK-NEXT: %ivy.epil = add i64 %iv.epil, %add -; EPILOG-BLOCK-NEXT: switch i64 %sum.epil, label %loop_latch.epil [ +; EPILOG-BLOCK-NEXT: %ivy.epil = add i64 %iv.unr, %add +; EPILOG-BLOCK-NEXT: switch i64 %sum.unr, label %loop_latch.epil [ ; EPILOG-BLOCK-NEXT: i64 24, label %exit1 ; EPILOG-BLOCK-NEXT: i64 42, label %exit1 ; EPILOG-BLOCK-NEXT: ] ; EPILOG-BLOCK: loop_latch.epil: -; EPILOG-BLOCK-NEXT: %iv_next.epil = add nuw nsw i64 %iv.epil, 1 -; EPILOG-BLOCK-NEXT: %sum.next.epil = add i64 %sum.epil, %add -; EPILOG-BLOCK-NEXT: %cmp.epil = icmp ne i64 %iv_next.epil, %trip -; EPILOG-BLOCK-NEXT: br label %latchexit.epilog-lcssa -; EPILOG-BLOCK: latchexit.epilog-lcssa: +; EPILOG-BLOCK-NEXT: %sum.next.epil = add i64 %sum.unr, %add ; EPILOG-BLOCK-NEXT: br label %latchexit ; EPILOG-BLOCK: latchexit: -; EPILOG-BLOCK-NEXT: %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %latchexit.epilog-lcssa ] +; EPILOG-BLOCK-NEXT: %sum.next.lcssa = phi i64 [ %sum.next.lcssa.ph, %latchexit.unr-lcssa ], [ %sum.next.epil, %loop_latch.epil ] ; EPILOG-BLOCK-NEXT: ret i64 %sum.next.lcssa ; EPILOG-BLOCK: loop_exiting.1: ; EPILOG-BLOCK-NEXT: %ivy.1 = add i64 %iv_next, %add @@ -3507,20 +3438,16 @@ ; PROLOG-BLOCK: loop_header.prol: ; PROLOG-BLOCK-NEXT: br i1 %cond, label %loop_latch.prol, label %loop_exiting.prol ; PROLOG-BLOCK: loop_exiting.prol: -; PROLOG-BLOCK-NEXT: %ivy.prol = add i64 0, %add ; PROLOG-BLOCK-NEXT: switch i64 0, label %loop_latch.prol [ ; PROLOG-BLOCK-NEXT: i64 24, label %exit1 ; PROLOG-BLOCK-NEXT: i64 42, label %exit1 ; PROLOG-BLOCK-NEXT: ] ; PROLOG-BLOCK: loop_latch.prol: -; PROLOG-BLOCK-NEXT: %iv_next.prol = add nuw nsw i64 0, 1 -; PROLOG-BLOCK-NEXT: %sum.next.prol = add i64 0, %add -; PROLOG-BLOCK-NEXT: %cmp.prol = icmp ne i64 %iv_next.prol, %trip ; PROLOG-BLOCK-NEXT: br label %loop_header.prol.loopexit ; PROLOG-BLOCK: loop_header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ %iv_next.prol, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %sum.next.prol, %loop_latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %sum.next.prol, %loop_latch.prol ] +; PROLOG-BLOCK-NEXT: %iv.unr = phi i64 [ 0, %entry ], [ 1, %loop_latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.unr = phi i64 [ 0, %entry ], [ %add, %loop_latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.next.lcssa.unr = phi i64 [ undef, %entry ], [ %add, %loop_latch.prol ] ; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %1, label %latchexit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -3543,7 +3470,7 @@ ; PROLOG-BLOCK-NEXT: %result.ph = phi i64 [ %ivy, %loop_exiting ], [ %ivy, %loop_exiting ], [ %ivy.1, %loop_exiting.1 ], [ %ivy.1, %loop_exiting.1 ] ; PROLOG-BLOCK-NEXT: br label %exit1 ; PROLOG-BLOCK: exit1: -; PROLOG-BLOCK-NEXT: %result = phi i64 [ %ivy.prol, %loop_exiting.prol ], [ %ivy.prol, %loop_exiting.prol ], [ %result.ph, %exit1.loopexit ] +; PROLOG-BLOCK-NEXT: %result = phi i64 [ %add, %loop_exiting.prol ], [ %add, %loop_exiting.prol ], [ %result.ph, %exit1.loopexit ] ; PROLOG-BLOCK-NEXT: ret i64 %result ; PROLOG-BLOCK: latchexit.unr-lcssa: ; PROLOG-BLOCK-NEXT: %sum.next.lcssa.ph = phi i64 [ %sum.next.1, %loop_latch.1 ] @@ -3779,29 +3706,23 @@ ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %indvars.iv.epil = phi i64 [ %indvars.iv.unr, %header.epil.preheader ] -; EPILOG-BLOCK-NEXT: %sum.02.epil = phi i32 [ %sum.02.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 false, label %for.exit2, label %for.exiting_block.epil ; EPILOG-BLOCK: for.exiting_block.epil: ; EPILOG-BLOCK-NEXT: %cmp.epil = icmp eq i64 %n, 42 ; EPILOG-BLOCK-NEXT: br i1 %cmp.epil, label %for.exit2, label %latch.epil ; EPILOG-BLOCK: latch.epil: -; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.epil +; EPILOG-BLOCK-NEXT: %arrayidx.epil = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.unr ; EPILOG-BLOCK-NEXT: %load.epil = load i32, i32* %arrayidx.epil, align 4 -; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %load.epil, %sum.02.epil -; EPILOG-BLOCK-NEXT: %indvars.iv.next.epil = add i64 %indvars.iv.epil, 1 -; EPILOG-BLOCK-NEXT: %exitcond.epil = icmp eq i64 %indvars.iv.next.epil, %n -; EPILOG-BLOCK-NEXT: br label %latch_exit.epilog-lcssa -; EPILOG-BLOCK: latch_exit.epilog-lcssa: +; EPILOG-BLOCK-NEXT: %add.epil = add nsw i32 %load.epil, %sum.02.unr ; EPILOG-BLOCK-NEXT: br label %latch_exit ; EPILOG-BLOCK: latch_exit: -; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch_exit.epilog-lcssa ] +; EPILOG-BLOCK-NEXT: %sum.0.lcssa = phi i32 [ %sum.0.lcssa.ph, %latch_exit.unr-lcssa ], [ %add.epil, %latch.epil ] ; EPILOG-BLOCK-NEXT: ret i32 %sum.0.lcssa ; EPILOG-BLOCK: for.exit2.loopexit: ; EPILOG-BLOCK-NEXT: %retval.ph = phi i32 [ 42, %for.exiting_block ], [ %sum.02, %header ], [ %add, %latch ], [ 42, %for.exiting_block.1 ] ; EPILOG-BLOCK-NEXT: br label %for.exit2 ; EPILOG-BLOCK: for.exit2: -; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.epil, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] +; EPILOG-BLOCK-NEXT: %retval = phi i32 [ %sum.02.unr, %header.epil ], [ 42, %for.exiting_block.epil ], [ %retval.ph, %for.exit2.loopexit ] ; EPILOG-BLOCK-NEXT: %addx = add i32 %retval, %x ; EPILOG-BLOCK-NEXT: br i1 %cond, label %exit_true, label %exit_false ; EPILOG-BLOCK: exit_true: @@ -3970,16 +3891,12 @@ ; PROLOG-BLOCK-NEXT: %cmp.prol = icmp eq i64 %n, 42 ; PROLOG-BLOCK-NEXT: br i1 %cmp.prol, label %for.exit2, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: %arrayidx.prol = getelementptr inbounds i32, i32* %a, i64 0 -; PROLOG-BLOCK-NEXT: %load.prol = load i32, i32* %arrayidx.prol, align 4 -; PROLOG-BLOCK-NEXT: %add.prol = add nsw i32 %load.prol, 0 -; PROLOG-BLOCK-NEXT: %indvars.iv.next.prol = add i64 0, 1 -; PROLOG-BLOCK-NEXT: %exitcond.prol = icmp eq i64 %indvars.iv.next.prol, %n +; PROLOG-BLOCK-NEXT: %load.prol = load i32, i32* %a, align 4 ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %add.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ %indvars.iv.next.prol, %latch.prol ] -; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %add.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.0.lcssa.unr = phi i32 [ undef, %entry ], [ %load.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %indvars.iv.unr = phi i64 [ 0, %entry ], [ 1, %latch.prol ] +; PROLOG-BLOCK-NEXT: %sum.02.unr = phi i32 [ 0, %entry ], [ %load.prol, %latch.prol ] ; PROLOG-BLOCK-NEXT: %1 = icmp ult i64 %0, 1 ; PROLOG-BLOCK-NEXT: br i1 %1, label %latch_exit, label %entry.new ; PROLOG-BLOCK: entry.new: @@ -4175,22 +4092,15 @@ ; EPILOG-BLOCK-NEXT: %niter.nsub = sub i64 %niter, 1 ; EPILOG-BLOCK-NEXT: br i1 false, label %loopexit1.loopexit, label %latch.1 ; EPILOG-BLOCK: latchexit.unr-lcssa.loopexit: -; EPILOG-BLOCK-NEXT: %i6.unr.ph = phi i64 [ %add.1, %latch.1 ] ; EPILOG-BLOCK-NEXT: br label %latchexit.unr-lcssa ; EPILOG-BLOCK: latchexit.unr-lcssa: -; EPILOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ %i6.unr.ph, %latchexit.unr-lcssa.loopexit ] ; EPILOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 ; EPILOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.epil.preheader, label %latchexit ; EPILOG-BLOCK: header.epil.preheader: ; EPILOG-BLOCK-NEXT: br label %header.epil ; EPILOG-BLOCK: header.epil: -; EPILOG-BLOCK-NEXT: %i6.epil = phi i64 [ %i6.unr, %header.epil.preheader ] ; EPILOG-BLOCK-NEXT: br i1 false, label %loopexit1, label %latch.epil ; EPILOG-BLOCK: latch.epil: -; EPILOG-BLOCK-NEXT: %add.epil = add nuw nsw i64 %i6.epil, 1 -; EPILOG-BLOCK-NEXT: %i9.epil = icmp slt i64 %add.epil, %sext -; EPILOG-BLOCK-NEXT: br label %latchexit.epilog-lcssa -; EPILOG-BLOCK: latchexit.epilog-lcssa: ; EPILOG-BLOCK-NEXT: br label %latchexit ; EPILOG-BLOCK: latchexit: ; EPILOG-BLOCK-NEXT: unreachable @@ -4302,11 +4212,9 @@ ; PROLOG-BLOCK: header.prol: ; PROLOG-BLOCK-NEXT: br i1 false, label %loopexit1, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: %add.prol = add nuw nsw i64 1, 1 -; PROLOG-BLOCK-NEXT: %i9.prol = icmp slt i64 %add.prol, %sext ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: -; PROLOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ %add.prol, %latch.prol ] +; PROLOG-BLOCK-NEXT: %i6.unr = phi i64 [ 1, %preheader ], [ 2, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %latchexit, label %preheader.new ; PROLOG-BLOCK: preheader.new: @@ -4512,31 +4420,29 @@ ; PROLOG-BLOCK: outerloop.loopexit.loopexit: ; PROLOG-BLOCK-NEXT: br label %outerloop.loopexit ; PROLOG-BLOCK: outerloop.loopexit: -; PROLOG-BLOCK-NEXT: br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.unr-lcssa.1 +; PROLOG-BLOCK-NEXT: br i1 false, label %innerH.prol.preheader.1, label %innerH.prol.loopexit.1 ; PROLOG-BLOCK: outerloop: ; PROLOG-BLOCK-NEXT: %i = phi i64 [ 3, %bb ], [ 0, %outerloop.loopexit.1 ] ; PROLOG-BLOCK-NEXT: %0 = sub i64 100, %i ; PROLOG-BLOCK-NEXT: %1 = sub i64 99, %i ; PROLOG-BLOCK-NEXT: %xtraiter = and i64 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i64 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit.unr-lcssa +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %innerH.prol.preheader, label %innerH.prol.loopexit ; PROLOG-BLOCK: innerH.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %innerH.prol ; PROLOG-BLOCK: innerH.prol: ; PROLOG-BLOCK-NEXT: %i4.prol = add nuw nsw i64 %i, 1 ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit, label %latch.prol ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit.unr-lcssa -; PROLOG-BLOCK: innerH.prol.loopexit.unr-lcssa: -; PROLOG-BLOCK-NEXT: %i3.unr.ph = phi i64 [ %i4.prol, %latch.prol ], [ %i, %outerloop ] ; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit ; PROLOG-BLOCK: innerH.prol.loopexit: +; PROLOG-BLOCK-NEXT: %i3.unr = phi i64 [ %i, %outerloop ], [ %i4.prol, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i64 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %exit.loopexit, label %outerloop.new ; PROLOG-BLOCK: outerloop.new: ; PROLOG-BLOCK-NEXT: br label %innerH ; PROLOG-BLOCK: innerH: -; PROLOG-BLOCK-NEXT: %i3 = phi i64 [ %i3.unr.ph, %outerloop.new ], [ %i4.1, %latch.1 ] +; PROLOG-BLOCK-NEXT: %i3 = phi i64 [ %i3.unr, %outerloop.new ], [ %i4.1, %latch.1 ] ; PROLOG-BLOCK-NEXT: %i4 = add nuw nsw i64 %i3, 1 ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit, label %latch ; PROLOG-BLOCK: latch: @@ -4560,16 +4466,14 @@ ; PROLOG-BLOCK: innerH.prol.1: ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.1, label %latch.prol.1 ; PROLOG-BLOCK: latch.prol.1: -; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit.unr-lcssa.1 -; PROLOG-BLOCK: innerH.prol.loopexit.unr-lcssa.1: -; PROLOG-BLOCK-NEXT: %i3.unr.ph.1 = phi i64 [ 1, %latch.prol.1 ], [ 0, %outerloop.loopexit ] ; PROLOG-BLOCK-NEXT: br label %innerH.prol.loopexit.1 ; PROLOG-BLOCK: innerH.prol.loopexit.1: +; PROLOG-BLOCK-NEXT: %i3.unr.1 = phi i64 [ 0, %outerloop.loopexit ], [ 1, %latch.prol.1 ] ; PROLOG-BLOCK-NEXT: br i1 false, label %exit.loopexit, label %outerloop.new.1 ; PROLOG-BLOCK: outerloop.new.1: ; PROLOG-BLOCK-NEXT: br label %innerH.1 ; PROLOG-BLOCK: innerH.1: -; PROLOG-BLOCK-NEXT: %i3.1 = phi i64 [ %i3.unr.ph.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ] +; PROLOG-BLOCK-NEXT: %i3.1 = phi i64 [ %i3.unr.1, %outerloop.new.1 ], [ %i4.1.1, %latch.1.1 ] ; PROLOG-BLOCK-NEXT: %i4.11 = add nuw nsw i64 %i3.1, 1 ; PROLOG-BLOCK-NEXT: br i1 false, label %outerloop.loopexit.loopexit.1, label %latch.12 ; PROLOG-BLOCK: latch.12: @@ -4799,23 +4703,21 @@ ; PROLOG-BLOCK: preheader: ; PROLOG-BLOCK-NEXT: %xtraiter = and i32 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0 -; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit.unr-lcssa +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod, label %header.prol.preheader, label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.preheader: ; PROLOG-BLOCK-NEXT: br label %header.prol ; PROLOG-BLOCK: header.prol: ; PROLOG-BLOCK-NEXT: br i1 true, label %latch.prol, label %innerexit.loopexit1 ; PROLOG-BLOCK: latch.prol: -; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.unr-lcssa -; PROLOG-BLOCK: header.prol.loopexit.unr-lcssa: -; PROLOG-BLOCK-NEXT: %phi.unr.ph = phi i64 [ 1, %latch.prol ], [ 0, %preheader ] ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit ; PROLOG-BLOCK: header.prol.loopexit: +; PROLOG-BLOCK-NEXT: %phi.unr = phi i64 [ 0, %preheader ], [ 1, %latch.prol ] ; PROLOG-BLOCK-NEXT: %2 = icmp ult i32 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %2, label %outerLatch.loopexit, label %preheader.new ; PROLOG-BLOCK: preheader.new: ; PROLOG-BLOCK-NEXT: br label %header ; PROLOG-BLOCK: header: -; PROLOG-BLOCK-NEXT: %phi = phi i64 [ %phi.unr.ph, %preheader.new ], [ %iv.next.1, %latch.1 ] +; PROLOG-BLOCK-NEXT: %phi = phi i64 [ %phi.unr, %preheader.new ], [ %iv.next.1, %latch.1 ] ; PROLOG-BLOCK-NEXT: br i1 true, label %latch, label %innerexit.loopexit.loopexit ; PROLOG-BLOCK: innerexit.loopexit.loopexit: ; PROLOG-BLOCK-NEXT: %trip.lcssa.ph.ph = phi i32 [ %trip, %latch ], [ %trip, %header ] @@ -4853,23 +4755,21 @@ ; PROLOG-BLOCK: preheader.1: ; PROLOG-BLOCK-NEXT: %xtraiter.1 = and i32 %0, 1 ; PROLOG-BLOCK-NEXT: %lcmp.mod.1 = icmp ne i32 %xtraiter.1, 0 -; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.unr-lcssa.1 +; PROLOG-BLOCK-NEXT: br i1 %lcmp.mod.1, label %header.prol.preheader.1, label %header.prol.loopexit.1 ; PROLOG-BLOCK: header.prol.preheader.1: ; PROLOG-BLOCK-NEXT: br label %header.prol.1 ; PROLOG-BLOCK: header.prol.1: ; PROLOG-BLOCK-NEXT: br i1 true, label %latch.prol.1, label %innerexit.loopexit1 ; PROLOG-BLOCK: latch.prol.1: -; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.unr-lcssa.1 -; PROLOG-BLOCK: header.prol.loopexit.unr-lcssa.1: -; PROLOG-BLOCK-NEXT: %phi.unr.ph.1 = phi i64 [ 1, %latch.prol.1 ], [ 0, %preheader.1 ] ; PROLOG-BLOCK-NEXT: br label %header.prol.loopexit.1 ; PROLOG-BLOCK: header.prol.loopexit.1: +; PROLOG-BLOCK-NEXT: %phi.unr.1 = phi i64 [ 0, %preheader.1 ], [ 1, %latch.prol.1 ] ; PROLOG-BLOCK-NEXT: %3 = icmp ult i32 %1, 1 ; PROLOG-BLOCK-NEXT: br i1 %3, label %outerLatch.loopexit.1, label %preheader.new.1 ; PROLOG-BLOCK: preheader.new.1: ; PROLOG-BLOCK-NEXT: br label %header.1 ; PROLOG-BLOCK: header.1: -; PROLOG-BLOCK-NEXT: %phi.1 = phi i64 [ %phi.unr.ph.1, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ] +; PROLOG-BLOCK-NEXT: %phi.1 = phi i64 [ %phi.unr.1, %preheader.new.1 ], [ %iv.next.1.1, %latch.1.1 ] ; PROLOG-BLOCK-NEXT: br i1 true, label %latch.14, label %innerexit.loopexit.loopexit5 ; PROLOG-BLOCK: latch.14: ; PROLOG-BLOCK-NEXT: %iv.next.13 = add nuw nsw i64 %phi.1, 1 Index: llvm/test/Transforms/LoopUnroll/runtime-loop1.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-loop1.ll +++ llvm/test/Transforms/LoopUnroll/runtime-loop1.ll @@ -15,9 +15,9 @@ ; EPILOG: for.body.epil.preheader: ; EPILOG: br label %for.body.epil, !dbg [[PH_LOC]] ; EPILOG: for.body.epil: -; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[PH_LOC]] +; EPILOG: br label %for.end.loopexit, !dbg [[EXIT_LOC:![0-9]+]] ; EPILOG: for.end.loopexit: -; EPILOG: br label %for.end, !dbg [[EXIT_LOC:![0-9]+]] +; EPILOG: br label %for.end, !dbg [[EXIT_LOC]] ; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}}) ; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}}) Index: llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll =================================================================== --- llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll +++ llvm/test/Transforms/LoopUnroll/runtime-unroll-assume-no-remainder.ll @@ -35,7 +35,7 @@ ; CHECK-NEXT: store i8 [[ADD_1]], i8* [[ARRAYIDX4_1]], align 1 ; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[INC]], 1 ; CHECK-NEXT: [[CMP1_1:%.*]] = icmp slt i32 [[INC_1]], [[N]] -; CHECK-NEXT: br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], [[LOOP0:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[CMP1_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT:%.*]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -113,7 +113,7 @@ ; CHECK-NEXT: [[INC_1]] = add nuw nsw i32 [[INC]], 1 ; CHECK-NEXT: [[NITER_NSUB_1]] = sub i32 [[NITER_NSUB]], 1 ; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp ne i32 [[NITER_NSUB_1]], 0 -; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], [[LOOP2:!llvm.loop !.*]] +; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label [[FOR_BODY]], label [[EXIT_LOOPEXIT_UNR_LCSSA_LOOPEXIT:%.*]], !llvm.loop [[LOOP2:![0-9]+]] ; CHECK: exit.loopexit.unr-lcssa.loopexit: ; CHECK-NEXT: [[I_011_UNR_PH:%.*]] = phi i32 [ [[INC_1]], [[FOR_BODY]] ] ; CHECK-NEXT: br label [[EXIT_LOOPEXIT_UNR_LCSSA]] @@ -124,16 +124,11 @@ ; CHECK: for.body.epil.preheader: ; CHECK-NEXT: br label [[FOR_BODY_EPIL:%.*]] ; CHECK: for.body.epil: -; CHECK-NEXT: [[I_011_EPIL:%.*]] = phi i32 [ [[I_011_UNR]], [[FOR_BODY_EPIL_PREHEADER]] ] -; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[I_011_EPIL]] +; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i8, i8* [[B]], i32 [[I_011_UNR]] ; CHECK-NEXT: [[TMP4:%.*]] = load i8, i8* [[ARRAYIDX_EPIL]], align 1 ; CHECK-NEXT: [[ADD_EPIL:%.*]] = add i8 [[TMP4]], 3 -; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[I_011_EPIL]] +; CHECK-NEXT: [[ARRAYIDX4_EPIL:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 [[I_011_UNR]] ; CHECK-NEXT: store i8 [[ADD_EPIL]], i8* [[ARRAYIDX4_EPIL]], align 1 -; CHECK-NEXT: [[INC_EPIL:%.*]] = add nuw nsw i32 [[I_011_EPIL]], 1 -; CHECK-NEXT: [[CMP1_EPIL:%.*]] = icmp slt i32 [[INC_EPIL]], [[N]] -; CHECK-NEXT: br label [[EXIT_LOOPEXIT_EPILOG_LCSSA:%.*]] -; CHECK: exit.loopexit.epilog-lcssa: ; CHECK-NEXT: br label [[EXIT_LOOPEXIT]] ; CHECK: exit.loopexit: ; CHECK-NEXT: br label [[EXIT]]