Index: llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp +++ llvm/trunk/lib/Transforms/Scalar/LoopInterchange.cpp @@ -410,7 +410,6 @@ void removeChildLoop(Loop *OuterLoop, Loop *InnerLoop); private: - void splitInnerLoopLatch(Instruction *); void splitInnerLoopHeader(); bool adjustLoopLinks(); void adjustLoopPreheaders(); @@ -1226,7 +1225,7 @@ if (InnerLoop->getSubLoops().empty()) { BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader(); - LLVM_DEBUG(dbgs() << "Calling Split Inner Loop\n"); + LLVM_DEBUG(dbgs() << "Splitting the inner loop latch\n"); PHINode *InductionPHI = getInductionVariable(InnerLoop, SE); if (!InductionPHI) { LLVM_DEBUG(dbgs() << "Failed to find the point to split loop latch \n"); @@ -1242,11 +1241,55 @@ if (&InductionPHI->getParent()->front() != InductionPHI) InductionPHI->moveBefore(&InductionPHI->getParent()->front()); - // Split at the place were the induction variable is - // incremented/decremented. - // TODO: This splitting logic may not work always. Fix this. - splitInnerLoopLatch(InnerIndexVar); - LLVM_DEBUG(dbgs() << "splitInnerLoopLatch done\n"); + // Create a new latch block for the inner loop. We split at the + // current latch's terminator and then move the condition and all + // operands that are not either loop-invariant or the induction PHI into the + // new latch block. + BasicBlock *NewLatch = + SplitBlock(InnerLoop->getLoopLatch(), + InnerLoop->getLoopLatch()->getTerminator(), DT, LI); + + SmallSetVector WorkList; + unsigned i = 0; + auto MoveInstructions = [&i, &WorkList, this, InductionPHI, NewLatch]() { + for (; i < WorkList.size(); i++) { + // Duplicate instruction and move it the new latch. Update uses that + // have been moved. + Instruction *NewI = WorkList[i]->clone(); + NewI->insertBefore(NewLatch->getFirstNonPHI()); + assert(!NewI->mayHaveSideEffects() && + "Moving instructions with side-effects may change behavior of " + "the loop nest!"); + for (auto UI = WorkList[i]->use_begin(), UE = WorkList[i]->use_end(); + UI != UE;) { + Use &U = *UI++; + Instruction *UserI = cast(U.getUser()); + if (!InnerLoop->contains(UserI->getParent()) || + UserI->getParent() == NewLatch || UserI == InductionPHI) + U.set(NewI); + } + // Add operands of moved instruction to the worklist, except if they are + // outside the inner loop or are the induction PHI. + for (Value *Op : WorkList[i]->operands()) { + Instruction *OpI = dyn_cast(Op); + if (!OpI || + this->LI->getLoopFor(OpI->getParent()) != this->InnerLoop || + OpI == InductionPHI) + continue; + WorkList.insert(OpI); + } + } + }; + + // FIXME: Should we interchange when we have a constant condition? + Instruction *CondI = dyn_cast( + cast(InnerLoop->getLoopLatch()->getTerminator()) + ->getCondition()); + if (CondI) + WorkList.insert(CondI); + MoveInstructions(); + WorkList.insert(cast(InnerIndexVar)); + MoveInstructions(); // Splits the inner loops phi nodes out into a separate basic block. BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); @@ -1263,10 +1306,6 @@ return true; } -void LoopInterchangeTransform::splitInnerLoopLatch(Instruction *Inc) { - SplitBlock(InnerLoop->getLoopLatch(), Inc, DT, LI); -} - /// \brief Move all instructions except the terminator from FromBB right before /// InsertBefore static void moveBBContents(BasicBlock *FromBB, Instruction *InsertBefore) { Index: llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll +++ llvm/trunk/test/Transforms/LoopInterchange/interchangeable.ll @@ -18,26 +18,28 @@ ; CHECK: for1.header.preheader: ; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] ; CHECK: for1.header: -; CHECK-NEXT: [[INDVARS_IV23:%.*]] = phi i64 [ [[INDVARS_IV_NEXT24:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: [[J23:%.*]] = phi i64 [ [[J_NEXT24:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[FOR2_SPLIT1:%.*]] ; CHECK: for2.preheader: ; CHECK-NEXT: br label [[FOR2:%.*]] ; CHECK: for2: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP0:%.*]], [[FOR2_SPLIT:%.*]] ], [ 0, [[FOR2_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] ; CHECK: for2.split1: -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J23]] ; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX5]] ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[LV]], [[K:%.*]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX5]] +; CHECK-NEXT: [[J_NEXT:%.*]] = add nuw nsw i64 [[J]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[J]], 99 ; CHECK-NEXT: br label [[FOR1_INC10]] ; CHECK: for2.split: -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 99 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END12:%.*]], label [[FOR2]] +; CHECK-NEXT: [[TMP0]] = add nuw nsw i64 [[J]], 1 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 [[J]], 99 +; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_END12:%.*]], label [[FOR2]] ; CHECK: for1.inc10: -; CHECK-NEXT: [[INDVARS_IV_NEXT24]] = add nuw nsw i64 [[INDVARS_IV23]], 1 -; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i64 [[INDVARS_IV23]], 99 +; CHECK-NEXT: [[J_NEXT24]] = add nuw nsw i64 [[J23]], 1 +; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i64 [[J23]], 99 ; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR2_SPLIT]], label [[FOR1_HEADER]] ; CHECK: for.end12: ; CHECK-NEXT: ret void @@ -79,26 +81,28 @@ ; CHECK: for1.header.preheader: ; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] ; CHECK: for1.header: -; CHECK-NEXT: [[INDVARS_IV19:%.*]] = phi i64 [ [[INDVARS_IV_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: [[J19:%.*]] = phi i64 [ [[J_NEXT20:%.*]], [[FOR1_INC10:%.*]] ], [ 0, [[FOR1_HEADER_PREHEADER:%.*]] ] ; CHECK-NEXT: br label [[FOR3_SPLIT1:%.*]] ; CHECK: for3.preheader: ; CHECK-NEXT: br label [[FOR3:%.*]] ; CHECK: for3: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ] +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 100, [[FOR3_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] ; CHECK: for3.split1: -; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV19]] +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J19]] ; CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX5]] ; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP0]], [[K:%.*]] ; CHECK-NEXT: store i64 [[ADD]], i64* [[ARRAYIDX5]] +; CHECK-NEXT: [[J_NEXT:%.*]] = add nsw i64 [[J]], -1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[J]], 0 ; CHECK-NEXT: br label [[FOR1_INC10]] ; CHECK: for3.split: -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i64 [[INDVARS_IV]], 0 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR3]], label [[FOR_END11:%.*]] +; CHECK-NEXT: [[TMP1]] = add nsw i64 [[J]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i64 [[J]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[FOR3]], label [[FOR_END11:%.*]] ; CHECK: for1.inc10: -; CHECK-NEXT: [[INDVARS_IV_NEXT20]] = add nuw nsw i64 [[INDVARS_IV19]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT20]], 100 +; CHECK-NEXT: [[J_NEXT20]] = add nuw nsw i64 [[J19]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[J_NEXT20]], 100 ; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR3_SPLIT]], label [[FOR1_HEADER]] ; CHECK: for.end11: ; CHECK-NEXT: ret void @@ -139,6 +143,28 @@ ;; FIXME: DA misses this case after D35430 define void @interchange_10() { +; CHECK-LABEL: @interchange_10( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR1_HEADER:%.*]] +; CHECK: for1.header: +; CHECK-NEXT: [[J23:%.*]] = phi i64 [ 1, [[ENTRY:%.*]] ], [ [[J_NEXT24:%.*]], [[FOR1_INC10:%.*]] ] +; CHECK-NEXT: [[J_NEXT24]] = add nuw nsw i64 [[J23]], 1 +; CHECK-NEXT: br label [[FOR2:%.*]] +; CHECK: for2: +; CHECK-NEXT: [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR2]] ], [ 1, [[FOR1_HEADER]] ] +; CHECK-NEXT: [[J_NEXT]] = add nuw nsw i64 [[J]], 1 +; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J23]] +; CHECK-NEXT: store i64 [[J]], i64* [[ARRAYIDX5]] +; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* @A, i64 0, i64 [[J]], i64 [[J_NEXT24]] +; CHECK-NEXT: store i64 [[J23]], i64* [[ARRAYIDX10]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[J]], 99 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_INC10]], label [[FOR2]] +; CHECK: for1.inc10: +; CHECK-NEXT: [[EXITCOND26:%.*]] = icmp eq i64 [[J23]], 98 +; CHECK-NEXT: br i1 [[EXITCOND26]], label [[FOR_END12:%.*]], label [[FOR1_HEADER]] +; CHECK: for.end12: +; CHECK-NEXT: ret void +; entry: br label %for1.header Index: llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll +++ llvm/trunk/test/Transforms/LoopInterchange/perserve-lcssa.ll @@ -10,13 +10,18 @@ define void @test_lcssa_indvars1() { ; CHECK-LABEL: @test_lcssa_indvars1() +; CHECK-LABEL: inner.body: +; CHECK-NEXT: %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ] + ; CHECK-LABEL: inner.body.split: ; CHECK-NEXT: %0 = phi i64 [ %iv.outer.next, %outer.latch ] -; CHECK-NEXT: %iv.inner.next = add nsw i64 %iv.inner, -1 +; CHECK-NEXT: %[[IVNEXT]] = add nsw i64 %iv.inner, -1 +; CHECK-NEXT: %[[COND:[0-9]+]] = icmp eq i64 %iv.inner, 0 +; CHECK-NEXT: br i1 %[[COND]], label %exit, label %inner.body ; CHECK-LABEL: exit: ; CHECK-NEXT: %v4.lcssa = phi i64 [ %0, %inner.body.split ] -; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ] +; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %[[IVNEXT]], %inner.body.split ] ; CHECK-NEXT: store i64 %v8.lcssa.lcssa, i64* @b, align 4 ; CHECK-NEXT: store i64 %v4.lcssa, i64* @a, align 4 @@ -52,9 +57,14 @@ define void @test_lcssa_indvars2() { ; CHECK-LABEL: @test_lcssa_indvars2() +; CHECK-LABEL: inner.body: +; CHECK-NEXT: %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ] + ; CHECK-LABEL: inner.body.split: ; CHECK-NEXT: %0 = phi i64 [ %iv.outer, %outer.latch ] -; CHECK-NEXT: %iv.inner.next = add nsw i64 %iv.inner, -1 +; CHECK-NEXT: %[[IVNEXT]] = add nsw i64 %iv.inner, -1 +; CHECK-NEXT: %[[COND:[0-9]+]] = icmp eq i64 %[[IVNEXT]], 0 +; CHECK-NEXT: br i1 %[[COND]], label %exit, label %inner.body ; CHECK-LABEL: exit: ; CHECK-NEXT: %v4.lcssa = phi i64 [ %0, %inner.body.split ] @@ -93,14 +103,19 @@ define void @test_lcssa_indvars3() { ; CHECK-LABEL: @test_lcssa_indvars3() +; CHECK-LABEL: inner.body: +; CHECK-NEXT: %iv.inner = phi i64 [ %[[IVNEXT:[0-9]+]], %inner.body.split ], [ 5, %inner.body.preheader ] + ; CHECK-LABEL: inner.body.split: ; CHECK-NEXT: %0 = phi i64 [ %iv.outer.next, %outer.latch ] -; CHECK-NEXT: %iv.inner.next = add nsw i64 %iv.inner, -1 +; CHECK-NEXT: %[[IVNEXT]] = add nsw i64 %iv.inner, -1 +; CHECK-NEXT: %[[COND:[0-9]+]] = icmp eq i64 %iv.inner, 0 +; CHECK-NEXT: br i1 %[[COND]], label %exit, label %inner.body ; CHECK-LABEL: exit: ; CHECK-NEXT: %v4.lcssa = phi i64 [ %0, %inner.body.split ] -; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %iv.inner.next, %inner.body.split ] -; CHECK-NEXT: %v8.lcssa.lcssa.2 = phi i64 [ %iv.inner.next, %inner.body.split ] +; CHECK-NEXT: %v8.lcssa.lcssa = phi i64 [ %[[IVNEXT]], %inner.body.split ] +; CHECK-NEXT: %v8.lcssa.lcssa.2 = phi i64 [ %[[IVNEXT]], %inner.body.split ] ; CHECK-NEXT: %r1 = add i64 %v8.lcssa.lcssa, %v8.lcssa.lcssa.2 ; CHECK-NEXT: store i64 %r1, i64* @b, align 4 ; CHECK-NEXT: store i64 %v4.lcssa, i64* @a, align 4 @@ -150,8 +165,12 @@ ; CHECK-LABEL: inner.ph: ; CHECK-NEXT: br label %inner.body ; CHECK-LABEL: inner.body: -; CHECK-NEXT: %tmp31 = phi i32 [ 0, %inner.ph ], [ %tmp6, %inner.body.split ] +; CHECK-NEXT: %tmp31 = phi i32 [ 0, %inner.ph ], [ %[[IVNEXT:[0-9]]], %inner.body.split ] ; CHECK-NEXT: br label %outer.ph +; CHECK-LABEL: inner.body.split: +; CHECK-NEXT: %[[IVNEXT]] = add nsw i32 %tmp31, 1 +; CHECK-NEXT: br i1 false, label %inner.body, label %exit + bb: br label %outer.ph Index: llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll +++ llvm/trunk/test/Transforms/LoopInterchange/phi-ordering.ll @@ -27,7 +27,7 @@ ; CHECK: for3.preheader: ; CHECK-NEXT: br label [[FOR3:%.*]] ; CHECK: for3: -; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[INC:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ] +; CHECK-NEXT: [[K:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR3_SPLIT:%.*]] ], [ 1, [[FOR3_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR1_HEADER_PREHEADER]] ; CHECK: for3.split1: ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[K]], [[MUL]] @@ -35,11 +35,13 @@ ; CHECK-NEXT: [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 ; CHECK-NEXT: [[ADD15:%.*]] = add nsw i16 [[TMP0]], 1 ; CHECK-NEXT: store i16 [[ADD15]], i16* [[ARRAYIDX]] +; CHECK-NEXT: [[INC:%.*]] = add nuw nsw i32 [[K]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90 ; CHECK-NEXT: br label [[FOR2_INC16]] ; CHECK: for3.split: -; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[K]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 90 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]] +; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[K]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 90 +; CHECK-NEXT: br i1 [[TMP2]], label [[FOR1_LOOPEXIT:%.*]], label [[FOR3]] ; CHECK: for2.inc16: ; CHECK-NEXT: [[INC17]] = add nuw nsw i32 [[J]], 1 ; CHECK-NEXT: [[EXITCOND47:%.*]] = icmp eq i32 [[INC17]], 90 Index: llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll +++ llvm/trunk/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll @@ -0,0 +1,140 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -loop-interchange -verify-loop-lcssa -verify-dom-info -S %s | FileCheck %s + +@b = external dso_local global [5 x i32], align 16 + +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]] +; CHECK: for.body2.preheader: +; CHECK-NEXT: br label [[FOR_BODY2:%.*]] +; CHECK: for.body2: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] +; CHECK: for.body2.split: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: store i32 undef, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4 +; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1 +; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.inc.split: +; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4 +; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]] +; CHECK: for.cond1.for.end_crit_edge: +; CHECK-NEXT: br label [[FOR_INC3]] +; CHECK: for.inc3: +; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1 +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]] +; CHECK: for.cond.for.end5_crit_edge: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.inc3, %entry + %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] + br label %for.body2 + +for.body2: ; preds = %for.inc, %for.body + %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] + br label %for.inc + +for.inc: ; preds = %for.body2 + %idxprom = sext i32 %inc41 to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + store i32 undef, i32* %arrayidx, align 4 + %cmp = icmp slt i32 %lsr.iv, 4 + %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 + br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge + +for.cond1.for.end_crit_edge: ; preds = %for.inc + br label %for.inc3 + +for.inc3: ; preds = %for.cond1.for.end_crit_edge + %inc4 = add nsw i32 %inc41, 1 + br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge + +for.cond.for.end5_crit_edge: ; preds = %for.inc3 + ret void +} + +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY2_PREHEADER:%.*]] +; CHECK: for.body.preheader: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ] +; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]] +; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]] +; CHECK: for.body2.preheader: +; CHECK-NEXT: br label [[FOR_BODY2:%.*]] +; CHECK: for.body2: +; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] +; CHECK: for.body2.split: +; CHECK-NEXT: br label [[FOR_INC:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4 +; CHECK-NEXT: [[CMP_ZEXT:%.*]] = zext i1 [[CMP]] to i32 +; CHECK-NEXT: store i32 [[CMP_ZEXT]], i32* [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[LSR_IV_NEXT:%.*]] = add nuw nsw i32 [[LSR_IV]], 1 +; CHECK-NEXT: br label [[FOR_COND1_FOR_END_CRIT_EDGE:%.*]] +; CHECK: for.inc.split: +; CHECK-NEXT: [[TMP1]] = add nuw nsw i32 [[LSR_IV]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[LSR_IV]], 4 +; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_BODY2]], label [[FOR_COND_FOR_END5_CRIT_EDGE:%.*]] +; CHECK: for.cond1.for.end_crit_edge: +; CHECK-NEXT: br label [[FOR_INC3]] +; CHECK: for.inc3: +; CHECK-NEXT: [[INC4]] = add nsw i32 [[INC41]], 1 +; CHECK-NEXT: br i1 false, label [[FOR_BODY]], label [[FOR_INC_SPLIT]] +; CHECK: for.cond.for.end5_crit_edge: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: ; preds = %for.inc3, %entry + %inc41 = phi i32 [ %inc4, %for.inc3 ], [ undef, %entry ] + br label %for.body2 + +for.body2: ; preds = %for.inc, %for.body + %lsr.iv = phi i32 [ %lsr.iv.next, %for.inc ], [ 1, %for.body ] + br label %for.inc + +for.inc: ; preds = %for.body2 + %idxprom = sext i32 %inc41 to i64 + %arrayidx = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp slt i32 %lsr.iv, 4 + %cmp.zext = zext i1 %cmp to i32 + store i32 %cmp.zext, i32* %arrayidx, align 4 + %lsr.iv.next = add nuw nsw i32 %lsr.iv, 1 + br i1 %cmp, label %for.body2, label %for.cond1.for.end_crit_edge + +for.cond1.for.end_crit_edge: ; preds = %for.inc + br label %for.inc3 + +for.inc3: ; preds = %for.cond1.for.end_crit_edge + %inc4 = add nsw i32 %inc41, 1 + br i1 undef, label %for.body, label %for.cond.for.end5_crit_edge + +for.cond.for.end5_crit_edge: ; preds = %for.inc3 + ret void +} Index: llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll =================================================================== --- llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll +++ llvm/trunk/test/Transforms/LoopInterchange/reductions-across-inner-and-outer-loop.ll @@ -31,6 +31,8 @@ ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x [100 x i64]], [100 x [100 x i64]]* [[ARR:%.*]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV23]] ; CHECK-NEXT: [[LV:%.*]] = load i64, i64* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[SUM_INC]] = add i64 [[SUM_INNER]], [[LV]] +; CHECK-NEXT: [[IV_ORIGINAL:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[EXIT1_ORIGINAL:%.*]] = icmp eq i64 [[IV_ORIGINAL]], 100 ; CHECK-NEXT: br label [[FOR1_INC]] ; CHECK: for2.split: ; CHECK-NEXT: [[SUM_INC_LCSSA]] = phi i64 [ [[SUM_INC]], %for1.inc ]