diff --git a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp --- a/llvm/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/llvm/lib/Transforms/Scalar/LoopInterchange.cpp @@ -1534,9 +1534,12 @@ InnerLoopPreHeader, DTUpdates, /*MustUpdateOnce=*/false); // The outer loop header might or might not branch to the outer latch. // We are guaranteed to branch to the inner loop preheader. - if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) - updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, LoopExit, DTUpdates, + if (llvm::is_contained(OuterLoopHeaderBI->successors(), OuterLoopLatch)) { + // In this case the outerLoopHeader should branch to the InnerLoopLatch. + updateSuccessor(OuterLoopHeaderBI, OuterLoopLatch, InnerLoopLatch, + DTUpdates, /*MustUpdateOnce=*/false); + } updateSuccessor(OuterLoopHeaderBI, InnerLoopPreHeader, InnerLoopHeaderSuccessor, DTUpdates, /*MustUpdateOnce=*/false); diff --git a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll --- a/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll +++ b/llvm/test/Transforms/LoopInterchange/lcssa-preheader.ll @@ -25,12 +25,12 @@ ; CHECK-NEXT: [[INDVARS_IV27:%.*]] = phi i64 [ 0, [[OUTER_PREHEADER:%.*]] ], [ [[INDVARS_IV_NEXT28:%.*]], [[OUTER_LATCH:%.*]] ] ; CHECK-NEXT: [[CMP222:%.*]] = icmp sgt i32 [[M:%.*]], 0 ; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[M]] to i64 -; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[OUTER_CRIT_EDGE:%.*]] +; CHECK-NEXT: br i1 [[CMP222]], label [[INNER_FOR_BODY_SPLIT1:%.*]], label [[INNER_FOR_BODY_SPLIT:%.*]] ; CHECK: inner.preheader: ; CHECK-NEXT: [[WIDE_TRIP_COUNT29:%.*]] = zext i32 [[N]] to i64 ; CHECK-NEXT: br label [[INNER_FOR_BODY:%.*]] ; CHECK: inner.for.body: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT:%.*]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[INNER_PREHEADER]] ], [ [[TMP1:%.*]], [[INNER_FOR_BODY_SPLIT]] ] ; CHECK-NEXT: br label [[OUTER_PREHEADER]] ; CHECK: inner.for.body.split1: ; CHECK-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [16 x [16 x i32]], [16 x [16 x i32]]* [[TEMP]], i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV27]] @@ -41,10 +41,10 @@ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] ; CHECK-NEXT: br label [[INNER_CRIT_EDGE:%.*]] ; CHECK: inner.for.body.split: -; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA2:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ] +; CHECK-NEXT: [[WIDE_TRIP_COUNT_LCSSA:%.*]] = phi i64 [ [[WIDE_TRIP_COUNT]], [[OUTER_LATCH]] ], [ [[WIDE_TRIP_COUNT]], [[OUTER_HEADER]] ] ; CHECK-NEXT: [[TMP1]] = add nuw nsw i64 [[INDVARS_IV]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA2]] -; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE]] +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[TMP1]], [[WIDE_TRIP_COUNT_LCSSA]] +; CHECK-NEXT: br i1 [[TMP2]], label [[INNER_FOR_BODY]], label [[OUTER_CRIT_EDGE:%.*]] ; CHECK: inner.crit_edge: ; CHECK-NEXT: br label [[OUTER_LATCH]] ; CHECK: outer.latch: diff --git a/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopInterchange/outer-header-jump-to-inner-latch.ll @@ -0,0 +1,172 @@ +; RUN: opt -loop-interchange -S %s | FileCheck %s + +@b = global [3 x [5 x [8 x i16]]] [[5 x [8 x i16]] zeroinitializer, [5 x [8 x i16]] [[8 x i16] zeroinitializer, [8 x i16] [i16 0, i16 0, i16 0, i16 6, i16 1, i16 6, i16 0, i16 0], [8 x i16] zeroinitializer, [8 x i16] zeroinitializer, [8 x i16] zeroinitializer], [5 x [8 x i16]] zeroinitializer], align 2 +@a = common global i32 0, align 4 +@.str = private constant [4 x i8] c"%d\0A\00", align 1 + + +@e = common dso_local local_unnamed_addr global i32 0, align 4 +@d = common dso_local local_unnamed_addr global [1 x [6 x i32]] zeroinitializer, align 4 + +; Doubly nested loop +; Function Attrs: nofree nounwind +define i32 @test1() { + +;CHECK: entry: +;CHECK-NEXT: br label [[FOR_COND1_PREHEADER:%.*]] +;CHECK: for.body.preheader: +;CHECK-NEXT: br label [[FOR_BODY:%.*]] +;CHECK: for.body: +;CHECK-NEXT: [[INDVARS_IV22:%.*]] = phi i64 [ [[INDVARS_IV_NEXT23:%.*]], [[FOR_INC8:%.*]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ] +;CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i64 [[INDVARS_IV22:%.*]], 0 +;CHECK-NEXT: br i1 [[TOBOOL]], label [[FOR_BODY3_SPLIT1:%.*]], label [[FOR_BODY3_SPLIT:%.*]] +;CHECK: for.cond1.preheader: +;CHECK-NEXT: br label [[FOR_BODY3:%.*]] +;CHECK: for.body3: +;CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_COND1_PREHEADER]] ], [ %3, [[FOR_BODY3_SPLIT]] ] +;CHECK-NEXT: br label [[FOR_BODY_PREHEADER]] +;CHECK: for.body3.split1: +;CHECK-NEXT: [[TMP0:%.*]] = add nuw nsw i64 [[INDVARS_IV22]], 5 +;CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 [[INDVARS_IV]], i64 [[INDVARS_IV]], i64 [[TMP0]] +;CHECK-NEXT: [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX7]] +;CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i32 +;CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a +;CHECK-NEXT: [[TMP_OR:%.*]] = or i32 [[TMP2]], [[CONV]] +;CHECK-NEXT: store i32 [[TMP_OR]], i32* @a +;CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +;CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], 3 +;CHECK-NEXT: br label [[FOR_INC8_LOOPEXIT:%.*]] +;CHECK: for.body3.split: +;CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[INDVARS_IV]], 1 +;CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[TMP3]], 3 +;CHECK-NEXT: br i1 %4, label [[FOR_BODY3]], label [[FOR_END10:%.*]] +;CHECK: for.inc8.loopexit: +;CHECK-NEXT: br label [[FOR_INC8]] +;CHECK: for.inc8: +;CHECK-NEXT: [[INDVARS_IV_NEXT23]] = add nuw nsw i64 [[INDVARS_IV22]], 1 +;CHECK-NEXT: [[EXITCOND25:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT23]], 3 +;CHECK-NEXT: br i1 [[EXITCOND25]], label [[FOR_BODY]], label [[FOR_BODY3_SPLIT]] +;CHECK: for.end10: +;CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* @a +;CHECK-NEXT: ret i32 0 + +entry: + br label %for.body + +for.body: ; preds = %entry, %for.inc8 + %indvars.iv22 = phi i64 [ 0, %entry ], [ %indvars.iv.next23, %for.inc8 ] + %tobool = icmp eq i64 %indvars.iv22, 0 + br i1 %tobool, label %for.cond1.preheader, label %for.inc8 + +for.cond1.preheader: ; preds = %for.body + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %indvars.iv = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next, %for.body3 ] + %0 = add nuw nsw i64 %indvars.iv22, 5 + %arrayidx7 = getelementptr inbounds [3 x [5 x [8 x i16]]], [3 x [5 x [8 x i16]]]* @b, i64 0, i64 %indvars.iv, i64 %indvars.iv, i64 %0 + %1 = load i16, i16* %arrayidx7 + %conv = sext i16 %1 to i32 + %2 = load i32, i32* @a + %or = or i32 %2, %conv + store i32 %or, i32* @a + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, 3 + br i1 %exitcond, label %for.body3, label %for.inc8.loopexit + +for.inc8.loopexit: ; preds = %for.body3 + br label %for.inc8 + +for.inc8: ; preds = %for.inc8.loopexit, %for.body + %indvars.iv.next23 = add nuw nsw i64 %indvars.iv22, 1 + %exitcond25 = icmp ne i64 %indvars.iv.next23, 3 + br i1 %exitcond25, label %for.body, label %for.end10 + +for.end10: ; preds = %for.inc8 + %3 = load i32, i32* @a + ret i32 0 +} + +; Triply nested loop +define internal fastcc void @test2() unnamed_addr #1 { +;CHECK: entry: +;CHECK: br label [[FOR_COND4_PREHEADER_LR_PH_I:%.*]] +;CHECK: for.cond4.preheader.lr.ph.i: +;CHECK-NEXT: [[G_ADDR_05_I:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[SUB_I:%.*]], [[FOR_END13_I:%.*]] ] +;CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4 +;CHECK-NEXT: [[TOBOOL71_I:%.*]] = icmp eq i32 [[TMP0]], 0 +;CHECK-NEXT: br label [[IF_END_I_PREHEADER:%.*]] +;CHECK: for.cond4.preheader.i.preheader: +;CHECK-NEXT: br label [[FOR_COND4_PREHEADER_I:%.*]] +;CHECK: for.cond4.preheader.i: +;CHECK-NEXT: [[INDVARS_IV8_I:%.*]] = phi i64 [ [[INDVARS_IV_NEXT9_I:%.*]], [[FOR_INC11_I:%.*]] ], [ 4, [[FOR_COND4_PREHEADER_I_PREHEADER:%.*]] ] +;CHECK-NEXT: br i1 [[TOBOOL71_I]], label [[IF_END_I_SPLIT1:%.*]], label [[IF_END_I_SPLIT:%.*]] +;CHECK: if.end.i.preheader: +;CHECK-NEXT: br label [[IF_END_I:%.*]] +;CHECK: if.end.i: +;CHECK-NEXT: [[INDVARS_IV_I:%.*]] = phi i64 [ [[TMP1:%.*]], [[IF_END_I_SPLIT]] ], [ 4, [[IF_END_I_PREHEADER:%.*]] ] +;CHECK-NEXT: br label [[FOR_COND4_PREHEADER_I_PREHEADER]] +;CHECK: if.end.i.split1: +;CHECK-NEXT: [[ARRAYIDX9_I:%.*]] = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 [[INDVARS_IV_I]], i64 [[INDVARS_IV8_I]] +;CHECK-NEXT: store i32 0, i32* [[ARRAYIDX9_I]], align 4 +;CHECK-NEXT: [[INDVARS_IV_NEXT_I:%.*]] = add nsw i64 [[INDVARS_IV_I]], -1 +;CHECK-NEXT: [[TOBOOL5_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT_I]], 0 +;CHECK-NEXT: br label [[FOR_INC11_I_LOOPEXIT_LOOPEXIT:%.*]] +;CHECK: if.end.i.split: +;CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV_I]], -1 +;CHECK-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +;CHECK-NEXT: br i1 [[TMP2]], label [[FOR_END13_I]], label [[IF_END_I]] +;CHECK: for.inc11.i.loopexit: +;CHECK-NEXT: br label [[FOR_INC11_I]] +;CHECK: for.inc11.i: ; preds = [[FOR_INC11_I_LOOPEXIT_LOOPEXIT]] +;CHECK-NEXT: [[INDVARS_IV_NEXT9_I:%.*]] = add nsw i64 [[INDVARS_IV8_I]], -1 +;CHECK-NEXT: [[TOBOOL2_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT9_I]], 0 +;CHECK-NEXT: br i1 [[TOBOOL2_I]], label [[IF_END_I_SPLIT]], label [[FOR_COND4_PREHEADER_I:%.*]] +;CHECK: for.end13.i: +;CHECK-NEXT: [[SUB_I]] = add nsw i32 [[G_ADDR_05_I]], -5 +;CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp eq i32 [[SUB_I]], 0 +;CHECK-NEXT: br i1 [[TOBOOL_I]], label [[F_EXIT:%.*]], label [[FOR_COND4_PREHEADER_LR_PH_I]] +;CHECK: f.exit: +;CHECK-NEXT: ret void + + +entry: + br label %for.cond4.preheader.lr.ph.i + +for.cond4.preheader.lr.ph.i: ; preds = %for.end13.i, %entry + %g.addr.05.i = phi i32 [ 10, %entry ], [ %sub.i, %for.end13.i ] + %0 = load i32, i32* @a, align 4 + %tobool71.i = icmp eq i32 %0, 0 + br label %for.cond4.preheader.i + +for.cond4.preheader.i: ; preds = %for.inc11.i, %for.cond4.preheader.lr.ph.i + %indvars.iv8.i = phi i64 [ 4, %for.cond4.preheader.lr.ph.i ], [ %indvars.iv.next9.i, %for.inc11.i ] + br i1 %tobool71.i, label %if.end.i.preheader, label %for.inc11.i + +if.end.i.preheader: ; preds = %for.cond4.preheader.i + br label %if.end.i + +if.end.i: ; preds = %if.end.i.preheader, %if.end.i + %indvars.iv.i = phi i64 [ %indvars.iv.next.i, %if.end.i ], [ 4, %if.end.i.preheader ] + %arrayidx9.i = getelementptr inbounds [1 x [6 x i32]], [1 x [6 x i32]]* @d, i64 0, i64 %indvars.iv.i, i64 %indvars.iv8.i + store i32 0, i32* %arrayidx9.i, align 4 + %indvars.iv.next.i = add nsw i64 %indvars.iv.i, -1 + %tobool5.i = icmp eq i64 %indvars.iv.next.i, 0 + br i1 %tobool5.i, label %for.inc11.i.loopexit, label %if.end.i + +for.inc11.i.loopexit: ; preds = %if.end.i + br label %for.inc11.i + +for.inc11.i: ; preds = %for.inc11.i.loopexit, %for.cond4.preheader.i + %indvars.iv.next9.i = add nsw i64 %indvars.iv8.i, -1 + %tobool2.i = icmp eq i64 %indvars.iv.next9.i, 0 + br i1 %tobool2.i, label %for.end13.i, label %for.cond4.preheader.i + +for.end13.i: ; preds = %for.inc11.i + %sub.i = add nsw i32 %g.addr.05.i, -5 + %tobool.i = icmp eq i32 %sub.i, 0 + br i1 %tobool.i, label %f.exit, label %for.cond4.preheader.lr.ph.i + +f.exit: ; preds = %for.end13.i + ret void +}