diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -573,11 +573,7 @@ LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); LoopsInterchanged++; - assert(InnerLoop->isLCSSAForm(*DT) && - "Inner loop not left in LCSSA form after loop interchange!"); - assert(OuterLoop->isLCSSAForm(*DT) && - "Outer loop not left in LCSSA form after loop interchange!"); - + llvm::formLCSSARecursively(*OuterLoop, *DT, LI, SE); return true; } }; @@ -1354,9 +1350,11 @@ for (Instruction *InnerIndexVar : InnerIndexVarList) WorkList.insert(cast(InnerIndexVar)); MoveInstructions(); + } - // Splits the inner loops phi nodes out into a separate basic block. - BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + // Ensure the inner loop phi nodes have a separate basic block. + BasicBlock *InnerLoopHeader = InnerLoop->getHeader(); + if (InnerLoopHeader->getFirstNonPHI() != InnerLoopHeader->getTerminator()) { SplitBlock(InnerLoopHeader, InnerLoopHeader->getFirstNonPHI(), DT, LI); LLVM_DEBUG(dbgs() << "splitting InnerLoopHeader done\n"); } diff --git a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll --- a/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll +++ b/llvm/test/Transforms/LoopInterchange/pr43176-move-to-new-latch.ll @@ -13,14 +13,12 @@ ; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]] -; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.body2.preheader: ; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body2: ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] -; CHECK: for.body2.split: -; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: store i32 undef, i32* [[ARRAYIDX]], align 4 @@ -80,14 +78,12 @@ ; CHECK-NEXT: [[INC41:%.*]] = phi i32 [ [[INC4:%.*]], [[FOR_INC3:%.*]] ], [ undef, [[FOR_BODY_PREHEADER:%.*]] ] ; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[INC41]] to i64 ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [5 x i32], [5 x i32]* @b, i64 0, i64 [[IDXPROM]] -; CHECK-NEXT: br label [[FOR_BODY2_SPLIT:%.*]] +; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.body2.preheader: ; CHECK-NEXT: br label [[FOR_BODY2:%.*]] ; CHECK: for.body2: ; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[TMP1:%.*]], [[FOR_INC_SPLIT:%.*]] ], [ 1, [[FOR_BODY2_PREHEADER]] ] ; CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] -; CHECK: for.body2.split: -; CHECK-NEXT: br label [[FOR_INC:%.*]] ; CHECK: for.inc: ; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 ; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[LSR_IV]], 4 diff --git a/llvm/test/Transforms/LoopInterchange/pr57148.ll b/llvm/test/Transforms/LoopInterchange/pr57148.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/pr57148.ll @@ -0,0 +1,168 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=loop-interchange -cache-line-size=4 -verify-dom-info -verify-loop-info -verify-scev -verify-loop-lcssa -S | FileCheck %s + +; Make sure the loops are in LCSSA form after loop interchange, +; and loop interchange does not hit assertion errors and crash. + +target triple = "x86_64-unknown-linux-gnu" + +@b = external global [512 x [4 x i32]], align 1 +@c = external global [2 x [4 x i32]], align 1 + +define void @test1() { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND37_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond33.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND33_PREHEADER:%.*]] +; CHECK: for.cond33.preheader: +; CHECK-NEXT: [[I_011:%.*]] = phi i16 [ [[INC69:%.*]], [[FOR_END67:%.*]] ], [ 0, [[FOR_COND33_PREHEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: br label [[FOR_BODY42_SPLIT1:%.*]] +; CHECK: for.body42.preheader: +; CHECK-NEXT: br label [[FOR_BODY42:%.*]] +; CHECK: for.cond37.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND37_PREHEADER:%.*]] +; CHECK: for.cond37.preheader: +; CHECK-NEXT: [[J_010:%.*]] = phi i16 [ [[INC66:%.*]], [[FOR_END64:%.*]] ], [ 0, [[FOR_COND37_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_BODY42_PREHEADER:%.*]] +; CHECK: for.body42: +; CHECK-NEXT: [[K_09:%.*]] = phi i16 [ [[TMP1:%.*]], [[FOR_BODY42_SPLIT:%.*]] ], [ -512, [[FOR_BODY42_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_COND33_PREHEADER_PREHEADER]] +; CHECK: for.body42.split1: +; CHECK-NEXT: [[SUB51:%.*]] = add nsw i16 [[K_09]], 512 +; CHECK-NEXT: [[ARRAYIDX55:%.*]] = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 [[SUB51]], i16 [[J_010]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX55]], align 1 +; CHECK-NEXT: [[ADD61:%.*]] = add i32 undef, undef +; CHECK-NEXT: [[INC63:%.*]] = add nsw i16 [[K_09]], 1 +; CHECK-NEXT: br label [[FOR_END67]] +; CHECK: for.body42.split: +; CHECK-NEXT: [[ADD61_LCSSA:%.*]] = phi i32 [ [[ADD61]], [[FOR_END67]] ] +; CHECK-NEXT: [[TMP1]] = add nsw i16 [[K_09]], 1 +; CHECK-NEXT: br i1 true, label [[FOR_END64]], label [[FOR_BODY42]] +; CHECK: for.end64: +; CHECK-NEXT: [[ADD61_LCSSA_LCSSA:%.*]] = phi i32 [ [[ADD61_LCSSA]], [[FOR_BODY42_SPLIT]] ] +; CHECK-NEXT: store i32 [[ADD61_LCSSA_LCSSA]], ptr undef, align 1 +; CHECK-NEXT: [[INC66]] = add nuw nsw i16 [[J_010]], 1 +; CHECK-NEXT: br i1 true, label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]] +; CHECK: for.end67: +; CHECK-NEXT: [[INC69]] = add nuw nsw i16 [[I_011]], 1 +; CHECK-NEXT: [[EXITCOND13_NOT:%.*]] = icmp eq i16 [[INC69]], 2 +; CHECK-NEXT: br i1 [[EXITCOND13_NOT]], label [[FOR_BODY42_SPLIT]], label [[FOR_COND33_PREHEADER]] +; CHECK: for.cond75.preheader: +; CHECK-NEXT: br label [[FOR_COND75:%.*]] +; CHECK: for.cond75: +; CHECK-NEXT: br label [[FOR_COND75]] +; +entry: + br label %for.cond33.preheader + +for.cond33.preheader: ; preds = %for.end67, %entry + %i.011 = phi i16 [ 0, %entry ], [ %inc69, %for.end67 ] + br label %for.cond37.preheader + +for.cond37.preheader: ; preds = %for.end64, %for.cond33.preheader + %j.010 = phi i16 [ 0, %for.cond33.preheader ], [ %inc66, %for.end64 ] + br label %for.body42 + +for.body42: ; preds = %for.body42, %for.cond37.preheader + %k.09 = phi i16 [ -512, %for.cond37.preheader ], [ %inc63, %for.body42 ] + %sub51 = add nsw i16 %k.09, 512 + %arrayidx55 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %sub51, i16 %j.010 + %0 = load i32, ptr %arrayidx55, align 1 + %add61 = add i32 undef, undef + %inc63 = add nsw i16 %k.09, 1 + br i1 true, label %for.end64, label %for.body42 + +for.end64: ; preds = %for.body42 + store i32 %add61, ptr undef, align 1 + %inc66 = add nuw nsw i16 %j.010, 1 + br i1 true, label %for.end67, label %for.cond37.preheader + +for.end67: ; preds = %for.end64 + %inc69 = add nuw nsw i16 %i.011, 1 + %exitcond13.not = icmp eq i16 %inc69, 2 + br i1 %exitcond13.not, label %for.cond75, label %for.cond33.preheader + +for.cond75: ; preds = %for.cond75, %for.end67 + br label %for.cond75 +} + + +; Make sure that we split the phi nodes in the middle loop header +; into a separate basic block to avoid the situation where use of +; the outermost indvar appears before its def after interchanging +; the outermost and the middle loop. Otherwise loop interchange +; would crash. + +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND37_PREHEADER_PREHEADER:%.*]] +; CHECK: for.cond33.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND33_PREHEADER:%.*]] +; CHECK: for.cond33.preheader: +; CHECK-NEXT: [[I_166:%.*]] = phi i16 [ [[INC69:%.*]], [[FOR_INC68:%.*]] ], [ 0, [[FOR_COND33_PREHEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX60:%.*]] = getelementptr inbounds [2 x [4 x i32]], ptr @c, i16 0, i16 [[I_166]], i16 [[J_165:%.*]] +; CHECK-NEXT: br label [[VECTOR_BODY85_SPLIT1:%.*]] +; CHECK: for.cond37.preheader.preheader: +; CHECK-NEXT: br label [[FOR_COND37_PREHEADER:%.*]] +; CHECK: for.cond37.preheader: +; CHECK-NEXT: [[J_165]] = phi i16 [ [[INC66:%.*]], [[MIDDLE_BLOCK80:%.*]] ], [ 0, [[FOR_COND37_PREHEADER_PREHEADER]] ] +; CHECK-NEXT: br label [[FOR_COND37_PREHEADER_SPLIT:%.*]] +; CHECK: for.cond37.preheader.split: +; CHECK-NEXT: br label [[VECTOR_BODY85:%.*]] +; CHECK: vector.body85: +; CHECK-NEXT: [[INDEX86:%.*]] = phi i16 [ 0, [[FOR_COND37_PREHEADER_SPLIT]] ], [ [[TMP3:%.*]], [[VECTOR_BODY85_SPLIT:%.*]] ] +; CHECK-NEXT: br label [[FOR_COND33_PREHEADER_PREHEADER]] +; CHECK: vector.body85.split1: +; CHECK-NEXT: [[TMP0:%.*]] = or i16 [[INDEX86]], 2 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 [[TMP0]], i16 [[J_165]] +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[INDEX_NEXT87:%.*]] = add nuw i16 [[INDEX86]], 4 +; CHECK-NEXT: br label [[FOR_INC68]] +; CHECK: vector.body85.split: +; CHECK-NEXT: [[TMP3]] = add nuw i16 [[INDEX86]], 4 +; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK80]], label [[VECTOR_BODY85]] +; CHECK: middle.block80: +; CHECK-NEXT: [[INC66]] = add nuw nsw i16 [[J_165]], 1 +; CHECK-NEXT: br i1 true, label [[FOR_COND75_PREHEADER:%.*]], label [[FOR_COND37_PREHEADER]] +; CHECK: for.inc68: +; CHECK-NEXT: [[INC69]] = add nuw nsw i16 [[I_166]], 1 +; CHECK-NEXT: [[EXITCOND77_NOT:%.*]] = icmp eq i16 [[INC69]], 2 +; CHECK-NEXT: br i1 [[EXITCOND77_NOT]], label [[VECTOR_BODY85_SPLIT]], label [[FOR_COND33_PREHEADER]] +; CHECK: for.cond75.preheader: +; CHECK-NEXT: unreachable +; +entry: + br label %for.cond33.preheader + +for.cond33.preheader: ; preds = %for.inc68, %entry + %i.166 = phi i16 [ %inc69, %for.inc68 ], [ 0, %entry ] + br label %for.cond37.preheader + +for.cond37.preheader: ; preds = %middle.block80, %for.cond33.preheader + %j.165 = phi i16 [ 0, %for.cond33.preheader ], [ %inc66, %middle.block80 ] + %arrayidx60 = getelementptr inbounds [2 x [4 x i32]], ptr @c, i16 0, i16 %i.166, i16 %j.165 + br label %vector.body85 + +vector.body85: ; preds = %vector.body85, %for.cond37.preheader + %index86 = phi i16 [ 0, %for.cond37.preheader ], [ %index.next87, %vector.body85 ] + %0 = or i16 %index86, 2 + %1 = getelementptr inbounds [512 x [4 x i32]], ptr @b, i16 0, i16 %0, i16 %j.165 + %2 = load i32, ptr %1, align 1 + %index.next87 = add nuw i16 %index86, 4 + br i1 undef, label %middle.block80, label %vector.body85 + +middle.block80: ; preds = %vector.body85 + %inc66 = add nuw nsw i16 %j.165, 1 + br i1 undef, label %for.inc68, label %for.cond37.preheader + +for.inc68: ; preds = %middle.block80 + %inc69 = add nuw nsw i16 %i.166, 1 + %exitcond77.not = icmp eq i16 %inc69, 2 + br i1 %exitcond77.not, label %for.cond75.preheader, label %for.cond33.preheader + +for.cond75.preheader: ; preds = %for.inc68 + unreachable +} +