Index: llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ llvm/trunk/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -1371,28 +1371,35 @@ DT.recalculate(F); + // We need to first add all the pre and post loop blocks into the loop + // structures (as part of createClonedLoopStructure), and then update the + // LCSSA form and LoopSimplifyForm. This is necessary for correctly updating + // LI when LoopSimplifyForm is generated. + Loop *PreL = nullptr, *PostL = nullptr; if (!PreLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PreL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map); - formLCSSARecursively(*L, DT, &LI, &SE); - simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Pre loops are slow paths, we do not need to perform any loop - // optimizations on them. - DisableAllLoopOptsOnLoop(*L); } if (!PostLoop.Blocks.empty()) { - auto *L = createClonedLoopStructure( + PostL = createClonedLoopStructure( &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map); + } + + // This function canonicalizes the loop into Loop-Simplify and LCSSA forms. + auto CanonicalizeLoop = [&] (Loop *L, bool IsOriginalLoop) { formLCSSARecursively(*L, DT, &LI, &SE); simplifyLoop(L, &DT, &LI, &SE, nullptr, true); - // Post loops are slow paths, we do not need to perform any loop + // Pre/post loops are slow paths, we do not need to perform any loop // optimizations on them. - DisableAllLoopOptsOnLoop(*L); - } - - formLCSSARecursively(OriginalLoop, DT, &LI, &SE); - simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true); + if (!IsOriginalLoop) + DisableAllLoopOptsOnLoop(*L); + }; + if (PreL) + CanonicalizeLoop(PreL, false); + if (PostL) + CanonicalizeLoop(PostL, false); + CanonicalizeLoop(&OriginalLoop, true); return true; } Index: llvm/trunk/test/Transforms/IRCE/correct-loop-info.ll =================================================================== --- llvm/trunk/test/Transforms/IRCE/correct-loop-info.ll +++ llvm/trunk/test/Transforms/IRCE/correct-loop-info.ll @@ -0,0 +1,182 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -irce < %s -S | FileCheck %s + +; REQUIRES: asserts + +; IRCE creates the pre and post loop, and invokes the +; canonicalizing these loops to LCSSA and loop-simplfy structure. Make sure that the update to the loopinfo does not +; incorrectly change the header while canonicalizing these pre/post loops. We +; were incorrectly updating LI when the split loop is a subloop as in the case below. +source_filename = "correct-loop-info.ll" + +define void @baz() personality i32* ()* @ham { +; CHECK-LABEL: @baz( +; CHECK-NEXT: bb: +; CHECK-NEXT: br label [[OUTERHEADER:%.*]] +; CHECK: outerheader: +; CHECK-NEXT: [[TMP:%.*]] = icmp slt i32 undef, 84 +; CHECK-NEXT: br i1 [[TMP]], label [[BB2:%.*]], label [[BB16:%.*]] +; CHECK: bb2: +; CHECK-NEXT: br i1 false, label [[INNERHEADER_PRELOOP_PREHEADER:%.*]], label [[PRELOOP_PSEUDO_EXIT:%.*]] +; CHECK: innerheader.preloop.preheader: +; CHECK-NEXT: br label [[INNERHEADER_PRELOOP:%.*]] +; CHECK: mainloop: +; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[INDVAR_END:%.*]], -1 +; CHECK-NEXT: br i1 [[TMP0]], label [[INNERHEADER_PREHEADER:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +; CHECK: innerheader.preheader: +; CHECK-NEXT: br label [[INNERHEADER:%.*]] +; CHECK: innerheader: +; CHECK-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP6:%.*]], [[BB8:%.*]] ], [ [[TMP4_PRELOOP_COPY:%.*]], [[INNERHEADER_PREHEADER]] ] +; CHECK-NEXT: invoke void @pluto() +; CHECK-NEXT: to label [[BB5:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit.split-lp +; CHECK: bb5: +; CHECK-NEXT: [[TMP6]] = add i32 [[TMP4]], 1 +; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[TMP6]], 0 +; CHECK-NEXT: br i1 true, label [[BB8]], label [[EXIT3_LOOPEXIT5:%.*]] +; CHECK: bb8: +; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 [[TMP6]], 84 +; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i32 [[TMP6]], -1 +; CHECK-NEXT: br i1 [[TMP1]], label [[INNERHEADER]], label [[MAIN_EXIT_SELECTOR:%.*]] +; CHECK: main.exit.selector: +; CHECK-NEXT: [[TMP6_LCSSA:%.*]] = phi i32 [ [[TMP6]], [[BB8]] ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 [[TMP6_LCSSA]], 84 +; CHECK-NEXT: br i1 [[TMP2]], label [[MAIN_PSEUDO_EXIT]], label [[BB13:%.*]] +; CHECK: main.pseudo.exit: +; CHECK-NEXT: [[TMP4_COPY:%.*]] = phi i32 [ [[TMP4_PRELOOP_COPY]], [[MAINLOOP:%.*]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END1:%.*]] = phi i32 [ [[INDVAR_END]], [[MAINLOOP]] ], [ [[TMP6_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[POSTLOOP:%.*]] +; CHECK: outer_exiting.loopexit: +; CHECK-NEXT: [[LPAD_LOOPEXIT:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label [[OUTER_EXITING:%.*]] +; CHECK: outer_exiting.loopexit.split-lp.loopexit: +; CHECK-NEXT: [[LPAD_LOOPEXIT2:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp +; CHECK: outer_exiting.loopexit.split-lp.loopexit.split-lp: +; CHECK-NEXT: %lpad.loopexit.split-lp3 = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label %outer_exiting.loopexit.split-lp +; CHECK: outer_exiting.loopexit.split-lp: +; CHECK-NEXT: br label [[OUTER_EXITING]] +; CHECK: outer_exiting: +; CHECK-NEXT: switch i32 undef, label [[EXIT2:%.*]] [ +; CHECK-NEXT: i32 142, label [[BB14:%.*]] +; CHECK-NEXT: i32 448, label [[EXIT:%.*]] +; CHECK-NEXT: ] +; CHECK: exit3.loopexit: +; CHECK-NEXT: br label [[EXIT3:%.*]] +; CHECK: exit3.loopexit4: +; CHECK-NEXT: br label [[EXIT3]] +; CHECK: exit3.loopexit5: +; CHECK-NEXT: br label [[EXIT3]] +; CHECK: exit3: +; CHECK-NEXT: ret void +; CHECK: bb13.loopexit: +; CHECK-NEXT: br label [[BB13]] +; CHECK: bb13: +; CHECK-NEXT: unreachable +; CHECK: bb14: +; CHECK-NEXT: br label [[OUTERHEADER]] +; CHECK: exit: +; CHECK-NEXT: ret void +; CHECK: bb16: +; CHECK-NEXT: ret void +; CHECK: exit2: +; CHECK-NEXT: ret void +; CHECK: innerheader.preloop: +; CHECK-NEXT: [[TMP4_PRELOOP:%.*]] = phi i32 [ [[TMP6_PRELOOP:%.*]], [[BB8_PRELOOP:%.*]] ], [ undef, [[INNERHEADER_PRELOOP_PREHEADER]] ] +; CHECK-NEXT: invoke void @pluto() +; CHECK-NEXT: to label [[BB5_PRELOOP:%.*]] unwind label [[OUTER_EXITING_LOOPEXIT:%.*]] +; CHECK: bb5.preloop: +; CHECK-NEXT: [[TMP6_PRELOOP]] = add i32 [[TMP4_PRELOOP]], 1 +; CHECK-NEXT: [[TMP7_PRELOOP:%.*]] = icmp ult i32 [[TMP6_PRELOOP]], 0 +; CHECK-NEXT: br i1 [[TMP7_PRELOOP]], label [[BB8_PRELOOP]], label [[EXIT3_LOOPEXIT:%.*]] +; CHECK: bb8.preloop: +; CHECK-NEXT: [[TMP9_PRELOOP:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], 84 +; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP6_PRELOOP]], -1 +; CHECK-NEXT: br i1 [[TMP3]], label [[INNERHEADER_PRELOOP]], label [[PRELOOP_EXIT_SELECTOR:%.*]], !llvm.loop !0, !irce.loop.clone !5 +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[TMP6_PRELOOP_LCSSA:%.*]] = phi i32 [ [[TMP6_PRELOOP]], [[BB8_PRELOOP]] ] +; CHECK-NEXT: [[TMP4:%.*]] = icmp slt i32 [[TMP6_PRELOOP_LCSSA]], 84 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRELOOP_PSEUDO_EXIT]], label [[BB13]] +; CHECK: preloop.pseudo.exit: +; CHECK-NEXT: [[TMP4_PRELOOP_COPY]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: [[INDVAR_END]] = phi i32 [ undef, [[BB2]] ], [ [[TMP6_PRELOOP_LCSSA]], [[PRELOOP_EXIT_SELECTOR]] ] +; CHECK-NEXT: br label [[MAINLOOP]] +; CHECK: postloop: +; CHECK-NEXT: br label [[INNERHEADER_POSTLOOP:%.*]] +; CHECK: innerheader.postloop: +; CHECK-NEXT: [[TMP4_POSTLOOP:%.*]] = phi i32 [ [[TMP6_POSTLOOP:%.*]], [[BB8_POSTLOOP:%.*]] ], [ [[TMP4_COPY]], [[POSTLOOP]] ] +; CHECK-NEXT: invoke void @pluto() +; CHECK-NEXT: to label [[BB5_POSTLOOP:%.*]] unwind label %outer_exiting.loopexit.split-lp.loopexit +; CHECK: bb5.postloop: +; CHECK-NEXT: [[TMP6_POSTLOOP]] = add i32 [[TMP4_POSTLOOP]], 1 +; CHECK-NEXT: [[TMP7_POSTLOOP:%.*]] = icmp ult i32 [[TMP6_POSTLOOP]], 0 +; CHECK-NEXT: br i1 [[TMP7_POSTLOOP]], label [[BB8_POSTLOOP]], label [[EXIT3_LOOPEXIT4:%.*]] +; CHECK: bb8.postloop: +; CHECK-NEXT: [[TMP9_POSTLOOP:%.*]] = icmp slt i32 [[TMP6_POSTLOOP]], 84 +; CHECK-NEXT: br i1 [[TMP9_POSTLOOP]], label [[INNERHEADER_POSTLOOP]], label [[BB13_LOOPEXIT:%.*]], !llvm.loop !6, !irce.loop.clone !5 +; +bb: + br label %outerheader + +outerheader: ; preds = %bb14, %bb + %tmp = icmp slt i32 undef, 84 + br i1 %tmp, label %bb2, label %bb16 + +bb2: ; preds = %outerheader + br label %innerheader + +innerheader: ; preds = %bb8, %bb2 + %tmp4 = phi i32 [ %tmp6, %bb8 ], [ undef, %bb2 ] + invoke void @pluto() + to label %bb5 unwind label %outer_exiting + +bb5: ; preds = %innerheader + %tmp6 = add i32 %tmp4, 1 + %tmp7 = icmp ult i32 %tmp6, 0 + br i1 %tmp7, label %bb8, label %exit3 + +bb8: ; preds = %bb5 + %tmp9 = icmp slt i32 %tmp6, 84 + br i1 %tmp9, label %innerheader, label %bb13 + +outer_exiting: ; preds = %innerheader + %tmp11 = landingpad { i8*, i32 } + cleanup + switch i32 undef, label %exit2 [ + i32 142, label %bb14 + i32 448, label %exit + ] + +exit3: ; preds = %bb5 + ret void + +bb13: ; preds = %bb8 + unreachable + +bb14: ; preds = %outer_exiting + br label %outerheader + +exit: ; preds = %outer_exiting + ret void + +bb16: ; preds = %outerheader + ret void + +exit2: ; preds = %outer_exiting + ret void +} + +declare i32* @ham() + +declare void @pluto() + +!0 = distinct !{!0, !1, !2, !3, !4} +!1 = !{!"llvm.loop.unroll.disable"} +!2 = !{!"llvm.loop.vectorize.enable", i1 false} +!3 = !{!"llvm.loop.licm_versioning.disable"} +!4 = !{!"llvm.loop.distribute.enable", i1 false} +!5 = !{} +!6 = distinct !{!6, !1, !2, !3, !4}