diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -397,10 +397,8 @@ public: LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, LoopInfo *LI, DominatorTree *DT, - BasicBlock *LoopNestExit, const LoopInterchangeLegality &LIL) - : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), - LoopExit(LoopNestExit), LIL(LIL) {} + : OuterLoop(Outer), InnerLoop(Inner), SE(SE), LI(LI), DT(DT), LIL(LIL) {} /// Interchange OuterLoop and InnerLoop. bool transform(); @@ -421,7 +419,6 @@ LoopInfo *LI; DominatorTree *DT; - BasicBlock *LoopExit; const LoopInterchangeLegality &LIL; }; @@ -523,7 +520,7 @@ Loop *LoopToBeInterchanged = LoopList[SelecLoopId]; for (unsigned i = SelecLoopId; i > 0; i--) { bool Interchanged = processLoop(LoopToBeInterchanged, LoopList[i - 1], i, - i - 1, LoopNestExit, DependencyMatrix); + i - 1, DependencyMatrix); if (!Interchanged) return Changed; // Update the DependencyMatrix @@ -538,7 +535,7 @@ } bool processLoop(Loop *InnerLoop, Loop *OuterLoop, unsigned InnerLoopId, - unsigned OuterLoopId, BasicBlock *LoopNestExit, + unsigned OuterLoopId, std::vector> &DependencyMatrix) { LLVM_DEBUG(dbgs() << "Processing InnerLoopId = " << InnerLoopId << " and OuterLoopId = " << OuterLoopId << "\n"); @@ -561,8 +558,7 @@ << "Loop interchanged with enclosing loop."; }); - LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LoopNestExit, - LIL); + LoopInterchangeTransform LIT(OuterLoop, InnerLoop, SE, LI, DT, LIL); LIT.transform(); LLVM_DEBUG(dbgs() << "Loops interchanged.\n"); LoopsInterchanged++; @@ -1551,9 +1547,12 @@ InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); // The outer loop header might or might not branch to the outer latch. // We are guaranteed to branch to the inner loop preheader. - if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) - updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates, + if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) { + // In this case the outerLoopHeader should branch to the InnerLoopLatch. + updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch, + DTUpdates, /*MustUpdateOnce=*/false); + } updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, InnerLoopHeaderSuccessor, DTUpdates, /*MustUpdateOnce=*/false); diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll --- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll @@ -25,12 +25,12 @@ ; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ] ; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 -; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]] +; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[INNER_FOR_BODY_SPLIT:%.*]] ; CHECK: inner.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: br label [[INNER_FOR_BODY:%.*]] ; CHECK: inner.for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT]] ] ; CHECK-NEXT: br label [[OUTER_PREHEADER]] ; CHECK: inner.for.body.split1: ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* [[TEMP]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV27]] @@ -41,10 +41,10 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]] ; CHECK: inner.for.body.split: -; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ] +; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ], [ [[WIDE_TRIP_COUNT]], [[OUTER_HEADER]] ] ; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]] -; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA]] +; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE:%.*]] ; CHECK: inner.crit_edge: ; CHECK-NEXT: br label [[OUTER_LATCH]] ; CHECK: outer.latch: diff --git a/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll @@ -0,0 +1,202 @@ +; RUN: opt -basic-aa -loop-interchange -verify-dom-info -verify-loop-info -verify-loop-lcssa -S %s | FileCheck %s + +@b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2 +@a = common global i32 0, align 4 +@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4 + + +; Doubly nested loop +;; C test case: +;; int a; +;; short b[3][5][8] = {{}, {{}, 0, 0, 0, 6, 1, 6}}; +;; void test1() { +;; int c = 0, d; +;; for (; c <= 2; c++) { +;; if (c) +;; continue; +;; d = 0; +;; for (; d <= 2; d++) +;; a |= b[d][d][c + 5]; +;; } +;; } + +define void @test1() { +;CHECK-LABEL: @test1( +;CHECK: entry: +;CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +;CHECK: for.body.preheader: +;CHECK-NEXT: br label [[FOR_BODY:%.*]] +;CHECK: for.body: +;CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ [[INDVARS_IV_NEXT23:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +;CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[INDVARS_IV22:%.*]], 0 +;CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY3_SPLIT1:%.*]], label [[FOR_BODY3_SPLIT:%.*]] +;CHECK: for.cond1.preheader: +;CHECK-NEXT: br label [[FOR_BODY3:%.*]] +;CHECK: for.body3: +;CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ %3, [[FOR_BODY3_SPLIT]] ] +;CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] +;CHECK: for.body3.split1: +;CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 5 +;CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV]], i64 [[TMP0]] +;CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX7]] +;CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +;CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a +;CHECK-NEXT: [[TMP_OR:%.*]] = or i32 [[TMP2]], [[CONV]] +;CHECK-NEXT: store i32 [[TMP_OR]], i32* @a +;CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +;CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 3 +;CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT:%.*]] +;CHECK: for.body3.split: +;CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +;CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 3 +;CHECK-NEXT: br i1 %4, label [[FOR_BODY3]], label [[FOR_END10:%.*]] +;CHECK: for.inc8.loopexit: +;CHECK-NEXT: br label [[FOR_INC8]] +;CHECK: for.inc8: +;CHECK-NEXT: [[INDVARS_IV_NEXT23]] = add nuw nsw i64 [[INDVARS_IV22]], 1 +;CHECK-NEXT: [[EXITCOND25:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT23]], 3 +;CHECK-NEXT: br i1 [[EXITCOND25]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]] +;CHECK: for.end10: +;CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @a +;CHECK-NEXT: ret void + +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc8 + %indvars.iv22 = phi i64 [ 0, %entry ], [ %indvars.iv.next23, %for.inc8 ] + %tobool = icmp eq i64 %indvars.iv22, 0 + br i1 %tobool, label %for.cond1.preheader, label %for.inc8 + +for.cond1.preheader: ; preds = %for.body + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %0 = add nuw nsw i64 %indvars.iv22, 5 + %arrayidx7 = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 %indvars.iv, i64 %indvars.iv, i64 %0 + %1 = load i16, i16* %arrayidx7 + %conv = sext i16 %1 to i32 + %2 = load i32, i32* @a + %or = or i32 %2, %conv + store i32 %or, i32* @a + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 3 + br i1 %exitcond, label %for.body3, label %for.inc8.loopexit + +for.inc8.loopexit: ; preds = %for.body3 + br label %for.inc8 + +for.inc8: ; preds = %for.inc8.loopexit, %for.body + %indvars.iv.next23 = add nuw nsw i64 %indvars.iv22, 1 + %exitcond25 = icmp ne i64 %indvars.iv.next23, 3 + br i1 %exitcond25, label %for.body, label %for.end10 + +for.end10: ; preds = %for.inc8 + %3 = load i32, i32* @a + ret void +} + +; Triply nested loop +; The innermost and the middle loop are interchanged. +; C test case: +;; a; +;; d[][6]; +;; void test2() { +;; int g = 10; +;; for (; g; g = g - 5) { +;; short c = 4; +;; for (; c; c--) { +;; int i = 4; +;; for (; i; i--) { +;; if (a) +;; break; +;; d[i][c] = 0; +;; } +;; } +;; } +;; } + +define void @test2() { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTERMOST_HEADER:%.*]] +; CHECK: outermost.header: +; CHECK-NEXT: [[INDVAR_OUTERMOST:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[INDVAR_OUTERMOST_NEXT:%.*]], [[OUTERMOST_LATCH:%.*]] ] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +; CHECK-NEXT: [[TOBOOL71_I:%.*]] = icmp eq i32 [[TMP0]], 0 +; CHECK-NEXT: br label [[INNERMOST_PREHEADER:%.*]] +; CHECK: middle.header.preheader: +; CHECK-NEXT: br label [[MIDDLE_HEADER:%.*]] +; CHECK: middle.header: +; CHECK-NEXT: [[INDVAR_MIDDLE:%.*]] = phi i64 [ [[INDVAR_MIDDLE_NEXT:%.*]], [[MIDDLE_LATCH:%.*]] ], [ 4, [[MIDDLE_HEADER_PREHEADER:%.*]] ] +; CHECK-NEXT: br i1 [[TOBOOL71_I]], label [[INNERMOST_BODY_SPLIT1:%.*]], label [[INNERMOST_BODY_SPLIT:%.*]] +; CHECK: innermost.preheader: +; CHECK-NEXT: br label [[INNERMOST_BODY:%.*]] +; CHECK: innermost.body: +; CHECK-NEXT: [[INDVAR_INNERMOST:%.*]] = phi i64 [ [[TMP1:%.*]], [[INNERMOST_BODY_SPLIT]] ], [ 4, [[INNERMOST_PREHEADER]] ] +; CHECK-NEXT: br label [[MIDDLE_HEADER_PREHEADER]] +; CHECK: innermost.body.split1: +; CHECK-NEXT: [[ARRAYIDX9_I:%.*]] = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 [[INDVAR_INNERMOST]], i64 [[INDVAR_MIDDLE]] +; CHECK-NEXT: store i32 0, i32* [[ARRAYIDX9_I]], align 4 +; CHECK-NEXT: [[INDVAR_INNERMOST_NEXT:%.*]] = add nsw i64 [[INDVAR_INNERMOST]], -1 +; CHECK-NEXT: [[TOBOOL5_I:%.*]] = icmp eq i64 [[INDVAR_INNERMOST_NEXT]], 0 +; CHECK-NEXT: br label [[MIDDLE_LATCH_LOOPEXIT:%.*]] +; CHECK: innermost.body.split: +; CHECK-NEXT: [[TMP1]] = add nsw i64 [[INDVAR_INNERMOST]], -1 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label [[OUTERMOST_LATCH]], label [[INNERMOST_BODY]] +; CHECK: innermost.loopexit: +; CHECK-NEXT: br label [[MIDDLE_LATCH]] +; CHECK: middle.latch: +; CHECK-NEXT: [[INDVAR_MIDDLE_NEXT]] = add nsw i64 [[INDVAR_MIDDLE]], -1 +; CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp eq i64 [[INDVAR_MIDDLE_NEXT]], 0 +; CHECK-NEXT: br i1 [[TOBOOL2_I]], label [[INNERMOST_BODY_SPLIT]], label [[MIDDLE_HEADER]] +; CHECK: outermost.latch: +; CHECK-NEXT: [[INDVAR_OUTERMOST_NEXT]] = add nsw i32 [[INDVAR_OUTERMOST]], -5 +; CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i32 [[INDVAR_OUTERMOST_NEXT]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_I]], label [[OUTERMOST_EXIT:%.*]], label [[OUTERMOST_HEADER]] +; CHECK: outermost.exit: +; CHECK-NEXT: ret void +; + +entry: + br label %outermost.header + +outermost.header: ; preds = %outermost.latch, %entry + %indvar.outermost = phi i32 [ 10, %entry ], [ %indvar.outermost.next, %outermost.latch ] + %0 = load i32, i32* @a, align 4 + %tobool71.i = icmp eq i32 %0, 0 + br label %middle.header + +middle.header: ; preds = %middle.latch, %outermost.header + %indvar.middle = phi i64 [ 4, %outermost.header ], [ %indvar.middle.next, %middle.latch ] + br i1 %tobool71.i, label %innermost.preheader, label %middle.latch + +innermost.preheader: ; preds = %middle.header + br label %innermost.body + +innermost.body: ; preds = %innermost.preheader, %innermost.body + %indvar.innermost = phi i64 [ %indvar.innermost.next, %innermost.body ], [ 4, %innermost.preheader ] + %arrayidx9.i = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 %indvar.innermost, i64 %indvar.middle + store i32 0, i32* %arrayidx9.i, align 4 + %indvar.innermost.next = add nsw i64 %indvar.innermost, -1 + %tobool5.i = icmp eq i64 %indvar.innermost.next, 0 + br i1 %tobool5.i, label %innermost.loopexit, label %innermost.body + +innermost.loopexit: ; preds = %innermost.body + br label %middle.latch + +middle.latch: ; preds = %middle.latch.loopexit, %middle.header + %indvar.middle.next = add nsw i64 %indvar.middle, -1 + %tobool2.i = icmp eq i64 %indvar.middle.next, 0 + br i1 %tobool2.i, label %outermost.latch, label %middle.header + +outermost.latch: ; preds = %middle.latch + %indvar.outermost.next = add nsw i32 %indvar.outermost, -5 + %tobool.i = icmp eq i32 %indvar.outermost.next, 0 + br i1 %tobool.i, label %outermost.exit, label %outermost.header + +outermost.exit: ; preds = %outermost.latch + ret void +}