Index: llvm/lib/Transforms/Scalar/LoopFuse.cpp =================================================================== --- llvm/lib/Transforms/Scalar/LoopFuse.cpp +++ llvm/lib/Transforms/Scalar/LoopFuse.cpp @@ -1536,7 +1536,10 @@ // Update DT/PDT DTU.applyUpdates(TreeUpdates); + LI.removeBlock(FC1GuardBlock); LI.removeBlock(FC1.Preheader); + LI.removeBlock(FC0.ExitBlock); + DTU.deleteBB(FC1GuardBlock); DTU.deleteBB(FC1.Preheader); DTU.deleteBB(FC0.ExitBlock); DTU.flush(); Index: llvm/test/Transforms/LoopFusion/double-loop-nest-inner-guard.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopFusion/double-loop-nest-inner-guard.ll @@ -0,0 +1,80 @@ +; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s + +; TODO: Add CHECK rules + +@a = dso_local local_unnamed_addr global [10 x [10 x i32]] zeroinitializer +@b = dso_local local_unnamed_addr global [10 x [10 x i32]] zeroinitializer +@c = dso_local local_unnamed_addr global [10 x [10 x i32]] zeroinitializer + +define dso_local i32 @foo(i32 %m, i32 %n, i32 %M, i32 %N) local_unnamed_addr { +entry: + %cmp63 = icmp sgt i32 %m, 0 + br i1 %cmp63, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup17 + +for.cond1.preheader.lr.ph: + %cmp261 = icmp sgt i32 %n, 0 + %wide.trip.count76 = zext i32 %m to i64 + %wide.trip.count72 = zext i32 %n to i64 + br label %for.cond1.preheader + +for.cond1.preheader: + %iv74 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %iv.next75, %for.cond.cleanup3 ] + br i1 %cmp261, label %for.body4.preheader, label %for.cond.cleanup3 + +for.body4.preheader: + br label %for.body4 + +for.body4: + %iv70 = phi i64 [ %iv.next71, %for.body4 ], [ 0, %for.body4.preheader ] + %idx6 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv74, i64 %iv70 + %0 = load i32, i32* %idx6 + %add = add nsw i32 %0, 2 + %idx10 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @b, i64 0, i64 %iv74, i64 %iv70 + store i32 %add, i32* %idx10 + %iv.next71 = add nuw nsw i64 %iv70, 1 + %exitcond73 = icmp eq i64 %iv.next71, %wide.trip.count72 + br i1 %exitcond73, label %for.cond.cleanup3.loopexit, label %for.body4 + +for.cond.cleanup3.loopexit: + br label %for.cond.cleanup3 + +for.cond.cleanup3: + %iv.next75 = add nuw nsw i64 %iv74, 1 + %exitcond77 = icmp eq i64 %iv.next75, %wide.trip.count76 + br i1 %exitcond77, label %for.cond20.preheader.preheader, label %for.cond1.preheader + +for.cond20.preheader.preheader: + br label %for.cond20.preheader + +for.cond20.preheader: + %iv66 = phi i64 [ %iv.next67, %for.cond.cleanup22 ], [ 0, %for.cond20.preheader.preheader ] + br i1 %cmp261, label %for.body23.preheader, label %for.cond.cleanup22 + +for.body23.preheader: + br label %for.body23 + +for.body23: + %iv = phi i64 [ %iv.next, %for.body23 ], [ 0, %for.body23.preheader ] + %idx27 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @a, i64 0, i64 %iv66, i64 %iv + %1 = load i32, i32* %idx27 + %mul = shl nsw i32 %1, 1 + %idx31 = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @c, i64 0, i64 %iv66, i64 %iv + store i32 %mul, i32* %idx31 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count72 + br i1 %exitcond, label %for.cond.cleanup22.loopexit, label %for.body23 + +for.cond.cleanup22.loopexit: + br label %for.cond.cleanup22 + +for.cond.cleanup22: + %iv.next67 = add nuw nsw i64 %iv66, 1 + %exitcond69 = icmp eq i64 %iv.next67, %wide.trip.count76 + br i1 %exitcond69, label %for.cond.cleanup17.loopexit, label %for.cond20.preheader + +for.cond.cleanup17.loopexit: + br label %for.cond.cleanup17 + +for.cond.cleanup17: + ret i32 undef +} Index: llvm/test/Transforms/LoopFusion/triple-loop-nest-inner-guard.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopFusion/triple-loop-nest-inner-guard.ll @@ -0,0 +1,112 @@ +; RUN: opt -S -loop-fusion < %s 2>&1 | FileCheck %s + +; TODO: Add CHECK rules + +@a = dso_local local_unnamed_addr global [10 x [10 x [10 x i32]]] zeroinitializer +@b = dso_local local_unnamed_addr global [10 x [10 x [10 x i32]]] zeroinitializer +@c = dso_local local_unnamed_addr global [10 x [10 x [10 x i32]]] zeroinitializer + +define dso_local i32 @foo(i32 %m, i32 %n, i32 %M, i32 %N) local_unnamed_addr { +entry: + %cmp101 = icmp sgt i32 %m, 0 + br i1 %cmp101, label %for.cond1.preheader.lr.ph, label %for.cond.cleanup28 + +for.cond1.preheader.lr.ph: + %cmp298 = icmp sgt i32 %n, 0 + %cmp696 = icmp sgt i32 %M, 0 + %wide.trip.count122 = zext i32 %m to i64 + %wide.trip.count118 = zext i32 %n to i64 + %wide.trip.count114 = zext i32 %M to i64 + br label %for.cond1.preheader + +for.cond1.preheader: + %iv120 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %iv.next121, %for.cond.cleanup3 ] + br i1 %cmp298, label %for.cond5.preheader.preheader, label %for.cond.cleanup3 + +for.cond5.preheader.preheader: + br label %for.cond5.preheader + +for.cond5.preheader: + %iv116 = phi i64 [ %iv.next117, %for.cond.cleanup7 ], [ 0, %for.cond5.preheader.preheader ] + br i1 %cmp696, label %for.body8.preheader, label %for.cond.cleanup7 + +for.body8.preheader: + br label %for.body8 + +for.cond.cleanup3.loopexit: + br label %for.cond.cleanup3 + +for.cond.cleanup3: + %iv.next121 = add nuw nsw i64 %iv120, 1 + %exitcond123 = icmp eq i64 %iv.next121, %wide.trip.count122 + br i1 %exitcond123, label %for.cond31.preheader.preheader, label %for.cond1.preheader + +for.cond31.preheader.preheader: + br label %for.cond31.preheader + +for.cond.cleanup7.loopexit: + br label %for.cond.cleanup7 + +for.cond.cleanup7: + %iv.next117 = add nuw nsw i64 %iv116, 1 + %exitcond119 = icmp eq i64 %iv.next117, %wide.trip.count118 + br i1 %exitcond119, label %for.cond.cleanup3.loopexit, label %for.cond5.preheader + +for.body8: + %iv112 = phi i64 [ %iv.next113, %for.body8 ], [ 0, %for.body8.preheader ] + %idx12 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv120, i64 %iv116, i64 %iv112 + %0 = load i32, i32* %idx12 + %add = add nsw i32 %0, 2 + %idx18 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @b, i64 0, i64 %iv120, i64 %iv116, i64 %iv112 + store i32 %add, i32* %idx18 + %iv.next113 = add nuw nsw i64 %iv112, 1 + %exitcond115 = icmp eq i64 %iv.next113, %wide.trip.count114 + br i1 %exitcond115, label %for.cond.cleanup7.loopexit, label %for.body8 + +for.cond31.preheader: + %iv108 = phi i64 [ %iv.next109, %for.cond.cleanup33 ], [ 0, %for.cond31.preheader.preheader ] + br i1 %cmp298, label %for.cond36.preheader.preheader, label %for.cond.cleanup33 + +for.cond36.preheader.preheader: + br label %for.cond36.preheader + +for.cond.cleanup28.loopexit: + br label %for.cond.cleanup28 + +for.cond.cleanup28: + ret i32 undef + +for.cond36.preheader: + %iv104 = phi i64 [ %iv.next105, %for.cond.cleanup38 ], [ 0, %for.cond36.preheader.preheader ] + br i1 %cmp696, label %for.body39.preheader, label %for.cond.cleanup38 + +for.body39.preheader: + br label %for.body39 + +for.cond.cleanup33.loopexit: + br label %for.cond.cleanup33 + +for.cond.cleanup33: + %iv.next109 = add nuw nsw i64 %iv108, 1 + %exitcond111 = icmp eq i64 %iv.next109, %wide.trip.count122 + br i1 %exitcond111, label %for.cond.cleanup28.loopexit, label %for.cond31.preheader + +for.cond.cleanup38.loopexit: + br label %for.cond.cleanup38 + +for.cond.cleanup38: + %iv.next105 = add nuw nsw i64 %iv104, 1 + %exitcond107 = icmp eq i64 %iv.next105, %wide.trip.count118 + br i1 %exitcond107, label %for.cond.cleanup33.loopexit, label %for.cond36.preheader + +for.body39: + %iv = phi i64 [ %iv.next, %for.body39 ], [ 0, %for.body39.preheader ] + %idx45 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @a, i64 0, i64 %iv108, i64 %iv104, i64 %iv + %1 = load i32, i32* %idx45 + %mul = shl nsw i32 %1, 1 + %idx51 = getelementptr inbounds [10 x [10 x [10 x i32]]], [10 x [10 x [10 x i32]]]* @c, i64 0, i64 %iv108, i64 %iv104, i64 %iv + store i32 %mul, i32* %idx51 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond = icmp eq i64 %iv.next, %wide.trip.count114 + br i1 %exitcond, label %for.cond.cleanup38.loopexit, label %for.body39 +}