Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -460,10 +460,20 @@ // equivalent to: // // intN_ty inc = IndVarIncreasing ? 1 : -1; - // pred_ty predicate = IndVarIncreasing ? ICMP_SLT : ICMP_SGT; + // pred_ty predicate = IndVarIncreasing + // ? IsSignedPredicate ? ICMP_SLT : ICMP_ULT + // : IsSignedPredicate ? ICMP_SGT : ICMP_UGT; // - // for (intN_ty iv = IndVarStart; predicate(iv, LoopExitAt); iv = IndVarBase) + // + // for (intN_ty iv = IndVarStart; predicate(IndVarBase, LoopExitAt); + // iv = IndVarNext) // ... body ... + // + // Here IndVarBase is either current or next value of the induction variable. + // in the former case, IsIndVarNext = false and IndVarBase points to the + // Phi node of the induction variable. Otherwise, IsIndVarNext = true and + // IndVarBase points to IV increment instruction. + // Value *IndVarBase; Value *IndVarStart; @@ -471,12 +481,13 @@ Value *LoopExitAt; bool IndVarIncreasing; bool IsSignedPredicate; + bool IsIndVarNext; LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), LatchExit(nullptr), LatchBrExitIdx(-1), IndVarBase(nullptr), IndVarStart(nullptr), IndVarStep(nullptr), LoopExitAt(nullptr), - IndVarIncreasing(false), IsSignedPredicate(true) {} + IndVarIncreasing(false), IsSignedPredicate(true), IsIndVarNext(false) {} template LoopStructure map(M Map) const { LoopStructure Result; @@ -492,6 +503,7 @@ Result.LoopExitAt = Map(LoopExitAt); Result.IndVarIncreasing = IndVarIncreasing; Result.IsSignedPredicate = IsSignedPredicate; + Result.IsIndVarNext = IsIndVarNext; return Result; } @@ -839,21 +851,42 @@ return false; }; - // `ICI` is interpreted as taking the backedge if the *next* value of the - // induction variable satisfies some constraint. + // `ICI` can either be a comparison against IV or a comparison of IV.next. + // Depending on the interpretation, we calculate the start value differently. + // Pair {IndVarBase; IsIndVarNext} semantically designates whether the latch + // comparisons happens against the IV before or after its value is + // incremented. Two valid combinations for them are: + // + // 1) { phi [ iv.start, preheader ], [ iv.next, latch ]; false }, + // 2) { iv.next; true }. + // + // The latch comparison happens against IndVarBase which can be either current + // or next value of the induction variable. const SCEVAddRecExpr *IndVarBase = cast(LeftSCEV); bool IsIncreasing = false; bool IsSignedPredicate = true; + bool IsIndVarNext = false; ConstantInt *StepCI; if (!IsInductionVar(IndVarBase, IsIncreasing, StepCI)) { FailureReason = "LHS in icmp not induction variable"; return None; } - const SCEV *StartNext = IndVarBase->getStart(); - const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE)); - const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); + const SCEV *IndVarStart = nullptr; + // TODO: Currently we only handle comparison against IV, but we can extend + // this analysis to be able to deal with comparison against sext(iv) and such. + if (isa(LeftValue) && + cast(LeftValue)->getParent() == Header) + // The comparison is made against current IV value. + IndVarStart = IndVarBase->getStart(); + else { + // Assume that the comparison is made against next IV value. + const SCEV *StartNext = IndVarBase->getStart(); + const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE)); + IndVarStart = SE.getAddExpr(StartNext, Addend); + IsIndVarNext = true; + } const SCEV *Step = SE.getSCEV(StepCI); ConstantInt *One = ConstantInt::get(IndVarTy, 1); @@ -1037,6 +1070,7 @@ Result.IndVarIncreasing = IsIncreasing; Result.LoopExitAt = RightValue; Result.IsSignedPredicate = IsSignedPredicate; + Result.IsIndVarNext = IsIndVarNext; FailureReason = nullptr; @@ -1065,13 +1099,14 @@ // takes. const SCEV *Smallest = nullptr, *Greatest = nullptr, *GreatestSeen = nullptr; + const bool IsIndVarNext = MainLoopStructure.IsIndVarNext; const SCEV *One = SE.getOne(Ty); if (Increasing) { Smallest = Start; - Greatest = End; + Greatest = IsIndVarNext ? End : SE.getAddExpr(End, One); // No overflow, because the range [Smallest, GreatestSeen] is not empty. - GreatestSeen = SE.getMinusSCEV(End, One); + GreatestSeen = IsIndVarNext ? SE.getMinusSCEV(End, One) : End; } else { // These two computations may sign-overflow. Here is why that is okay: // @@ -1089,7 +1124,7 @@ // will be an empty range. Returning an empty range is always safe. // - Smallest = SE.getAddExpr(End, One); + Smallest = IsIndVarNext ? SE.getAddExpr(End, One) : End; Greatest = SE.getAddExpr(Start, One); GreatestSeen = Start; } @@ -1326,8 +1361,8 @@ BranchToContinuation); NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); - NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), - RRI.ExitSelector); + auto *FixupValue = PN->getIncomingValueForBlock(LS.Latch); + NewPHI->addIncoming(FixupValue, RRI.ExitSelector); RRI.PHIValuesAtPseudoExit.push_back(NewPHI); } @@ -1468,7 +1503,6 @@ } ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); } - ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt); ExitPreLoopAt->setName("exit.preloop.at"); } @@ -1487,7 +1521,9 @@ } ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); } - + if (!MainLoopStructure.IsIndVarNext) + ExitMainLoopAtSCEV = SE.getMinusSCEV( + ExitMainLoopAtSCEV, SE.getSCEV(MainLoopStructure.IndVarStep)); ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt); ExitMainLoopAt->setName("exit.mainloop.at"); } @@ -1743,7 +1779,10 @@ } LoopStructure LS = MaybeLoopStructure.getValue(); const SCEVAddRecExpr *IndVar = - cast(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep))); + cast(SE.getSCEV(LS.IndVarBase)); + if (LS.IsIndVarNext) + IndVar = cast(SE.getMinusSCEV(IndVar, + SE.getSCEV(LS.IndVarStep))); Optional SafeIterRange; Instruction *ExprInsertPt = Preheader->getTerminator(); Index: test/Transforms/IRCE/latch-comparison-against-current-value.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/latch-comparison-against-current-value.ll @@ -0,0 +1,527 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; Check that IRCE is able to deal with loops where the latch comparison is +; done against current value of the IV, not the IV.next. + +; CHECK-LABEL: irce: in function test_01: constrained Loop at depth 1 containing +; CHECK-LABEL: irce: in function test_02: constrained Loop at depth 1 containing +; CHECK-NOT: irce: in function test_03: constrained Loop +; CHECK-NOT: irce: in function test_04: constrained Loop +; CHECK-LABEL: irce: in function test_05: constrained Loop at depth 1 containing +; CHECK-LABEL: irce: in function test_06: constrained Loop at depth 1 containing +; CHECK-LABEL: irce: in function test_07: constrained Loop at depth 1 containing +; CHECK-LABEL: irce: in function test_08: constrained Loop at depth 1 containing +; CHECK-LABEL: irce: in function test_09: constrained Loop at depth 1 containing + +; SLT condition for increasing loop from 0 to 100. +define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_01 +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: %exit.mainloop.at = add i32 %len, -1 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ] +; CHECK-NEXT: %idx.next = add nuw nsw i32 %idx, 1 +; CHECK-NEXT: %abc = icmp slt i32 %idx, %len +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp slt i32 %idx, 100 +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND3]], label %loop, label %main.exit.selector +; CHECK: main.exit.selector: +; CHECK-NEXT: %idx.lcssa = phi i32 [ %idx, %in.bounds ] +; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND4:%[^ ]+]] = icmp slt i32 %idx.lcssa, 100 +; CHECK-NEXT: br i1 [[COND4]], label %main.pseudo.exit, label %exit +; CHECK-NOT: loop.preloop: +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ] +; CHECK-NEXT: %idx.next.postloop = add nuw nsw i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp slt i32 %idx.postloop, %len +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp slt i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; ULT condition for increasing loop from 0 to 100. +define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_02 +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: %exit.mainloop.at = add i32 %len, -1 +; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND2]], label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: +; CHECK-NEXT: %idx = phi i32 [ %idx.next, %in.bounds ], [ 0, %loop.preheader ] +; CHECK-NEXT: %idx.next = add nuw nsw i32 %idx, 1 +; CHECK-NEXT: %abc = icmp ult i32 %idx, %len +; CHECK-NEXT: br i1 true, label %in.bounds, label %out.of.bounds.loopexit1 +; CHECK: in.bounds: +; CHECK-NEXT: %addr = getelementptr i32, i32* %arr, i32 %idx +; CHECK-NEXT: store i32 0, i32* %addr +; CHECK-NEXT: %next = icmp ult i32 %idx, 100 +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp ult i32 %idx, %exit.mainloop.at +; CHECK-NEXT: br i1 [[COND3]], label %loop, label %main.exit.selector +; CHECK: main.exit.selector: +; CHECK-NEXT: %idx.lcssa = phi i32 [ %idx, %in.bounds ] +; CHECK-NEXT: %idx.next.lcssa = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND4:%[^ ]+]] = icmp ult i32 %idx.lcssa, 100 +; CHECK-NEXT: br i1 [[COND4]], label %main.pseudo.exit, label %exit +; CHECK-NOT: loop.preloop: +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ] +; CHECK-NEXT: %idx.next.postloop = add nuw nsw i32 %idx.postloop, 1 +; CHECK-NEXT: %abc.postloop = icmp ult i32 %idx.postloop, %len +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Same as test_01, but comparison happens against IV extended to a wider type. +; This test ensures that IRCE rejects it and does not falsely assume that it was +; a comparison against iv.next. +; TODO: We can actually extend the recognition to cover this case. +define void @test_03(i32* %arr, i64* %a_len_ptr) #0 { + +; CHECK-LABEL: test_03 + +entry: + %len = load i64, i64* %a_len_ptr, !range !1 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %idx.ext = sext i32 %idx to i64 + %abc = icmp slt i64 %idx.ext, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Same as test_02, but comparison happens against IV extended to a wider type. +; This test ensures that IRCE rejects it and does not falsely assume that it was +; a comparison against iv.next. +; TODO: We can actually extend the recognition to cover this case. +define void @test_04(i32* %arr, i64* %a_len_ptr) #0 { + +; CHECK-LABEL: test_04 + +entry: + %len = load i64, i64* %a_len_ptr, !range !1 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %idx.ext = sext i32 %idx to i64 + %abc = icmp ult i64 %idx.ext, %len + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK-LABEL: test_05 +; CHECK: entry: +; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +; CHECK-NEXT: %exit.mainloop.at = add i32 %len, -1 +; CHECK-NEXT: br i1 true, label %loop.preloop.preheader, label %preloop.pseudo.exit +; CHECK: loop.preloop: +; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ -10, %loop.preloop.preheader ] +; CHECK-NEXT: %idx.next.preloop = add i32 %idx.preloop, 1 +; CHECK-NEXT: %c1.preloop = icmp sge i32 %idx.preloop, 0 +; CHECK-NEXT: %c2.preloop = icmp slt i32 %idx.preloop, %len +; CHECK-NEXT: %abc.preloop = and i1 %c1.preloop, %c2.preloop +; CHECK-NEXT: br i1 %abc.preloop, label %in.bounds.preloop, label %out.of.bounds.loopexit +; CHECK: in.bounds.preloop: +; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +; CHECK-NEXT: store i32 0, i32* %addr.preloop +; CHECK-NEXT: %next.preloop = icmp slt i32 %idx.preloop, 100 +; CHECK-NEXT: [[COND3:%[^ ]+]] = icmp slt i32 %idx.preloop, 0 +; CHECK-NEXT: br i1 [[COND3]], label %loop.preloop, label %preloop.exit.selector +; CHECK: loop.postloop: +; CHECK-NEXT: %idx.postloop = phi i32 [ %idx.copy, %postloop ], [ %idx.next.postloop, %in.bounds.postloop ] +; CHECK-NEXT: %idx.next.postloop = add i32 %idx.postloop, 1 +; CHECK-NEXT: %c1.postloop = icmp sge i32 %idx.postloop, 0 +; CHECK-NEXT: %c2.postloop = icmp slt i32 %idx.postloop, %len +; CHECK-NEXT: %abc.postloop = and i1 %c1.postloop, %c2.postloop +; CHECK-NEXT: br i1 %abc.postloop, label %in.bounds.postloop, label %out.of.bounds.loopexit2 +; CHECK: in.bounds.postloop: +; CHECK-NEXT: %addr.postloop = getelementptr i32, i32* %arr, i32 %idx.postloop +; CHECK-NEXT: store i32 0, i32* %addr.postloop +; CHECK-NEXT: %next.postloop = icmp slt i32 %idx.postloop, 100 +; CHECK-NEXT: br i1 %next.postloop, label %loop.postloop, label %exit.loopexit + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [-10, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, 1 + %c1 = icmp sge i32 %idx, 0 + %c2 = icmp slt i32 %idx, %len + %abc = and i1 %c1, %c2 + br i1 %abc, label %in.bounds, label %out.of.bounds + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp slt i32 %idx, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Make sure that we insert a postloop in the situation when we should exit by +; the range check on the last iteration. Unsigned case. + +define void @test_06() #0 { + +; CHECK-LABEL: test_06 +; CHECK: entry: +; CHECK-NEXT: br i1 true, label %loop.preheader +; CHECK-NOT: preloop +; CHECK: loop.preheader: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %IV = phi i64 [ %IV.next, %latch ], [ 5, %loop.preheader ] +; CHECK-NEXT: %range_check = icmp ult i64 %IV, 83 +; CHECK-NEXT: br i1 true, label %inner_loop_preheader, label %out_of_bounds.loopexit1 +; CHECK: inner_loop: +; CHECK-NEXT: %IV.inner = phi i32 [ %IV.inner.next, %inner_loop ], [ 83, %inner_loop_preheader ] +; CHECK-NEXT: %IV.inner.next = add nsw i32 %IV.inner, -3 +; CHECK-NEXT: %tmp3 = zext i32 %IV.inner.next to i64 +; CHECK-NEXT: %tmp4 = icmp ult i64 %IV, %tmp3 +; CHECK-NEXT: br i1 %tmp4, label %inner_loop, label %latch +; CHECK: latch: +; CHECK-NEXT: %IV.next = add nuw i64 %IV, 1 +; CHECK-NEXT: %tmp5 = icmp ugt i64 %IV, 82 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ult i64 %IV, 82 +; CHECK-NEXT: [[COND2:%[^ ]+]] = xor i1 [[COND1]], true +; CHECK-NEXT: br i1 [[COND2]], label %main.exit.selector, label %loop +; CHECK: main.pseudo.exit: +; CHECK-NEXT: %IV.copy = phi i64 [ 5, %entry ], [ %IV.next.lcssa, %main.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i64 [ 5, %entry ], [ %IV.lcssa, %main.exit.selector ] +; CHECK-NEXT: br label %postloop +; CHECK: loop.postloop: +; CHECK-NEXT: %IV.postloop = phi i64 [ %IV.copy, %postloop ], [ %IV.next.postloop, %latch.postloop ] +; CHECK-NEXT: %range_check.postloop = icmp ult i64 %IV.postloop, 83 +; CHECK-NEXT: br i1 %range_check.postloop, label %inner_loop_preheader.postloop, label %out_of_bounds.loopexit +; CHECK: latch.postloop: +; CHECK-NEXT: %IV.next.postloop = add nuw i64 %IV.postloop, 1 +; CHECK-NEXT: %tmp5.postloop = icmp ugt i64 %IV.postloop, 82 +; CHECK-NEXT: br i1 %tmp5.postloop, label %exit.loopexit, label %loop.postloop + +entry: + br label %loop + +loop: + %IV = phi i64 [ 5, %entry ], [ %IV.next, %latch ] + %range_check = icmp ult i64 %IV, 83 + br i1 %range_check, label %inner_loop_preheader, label %out_of_bounds + +inner_loop_preheader: + br label %inner_loop + +inner_loop: + %IV.inner = phi i32 [ %IV.inner.next, %inner_loop ], [ 83, %inner_loop_preheader ] + %IV.inner.next = add nsw i32 %IV.inner, -3 + %tmp3 = zext i32 %IV.inner.next to i64 + %tmp4 = icmp ult i64 %IV, %tmp3 + br i1 %tmp4, label %inner_loop, label %latch + +latch: + %IV.next = add nuw i64 %IV, 1 + %tmp5 = icmp ugt i64 %IV, 82 + br i1 %tmp5, label %exit, label %loop + +exit: + ret void + +out_of_bounds: + ret void +} + +; Make sure that we insert a postloop in the situation when we should exit by +; the range check on the last iteration. Signed case. + +define void @test_07() #0 { + +; CHECK-LABEL: test_07 +; CHECK: entry: +; CHECK-NEXT: br i1 true, label %loop.preheader +; CHECK-NOT: preloop +; CHECK: loop.preheader: +; CHECK-NEXT: br label %loop +; CHECK: loop: +; CHECK-NEXT: %IV = phi i64 [ %IV.next, %latch ], [ 5, %loop.preheader ] +; CHECK-NEXT: %range_check = icmp slt i64 %IV, 83 +; CHECK-NEXT: br i1 true, label %inner_loop_preheader, label %out_of_bounds.loopexit1 +; CHECK: inner_loop: +; CHECK-NEXT: %IV.inner = phi i32 [ %IV.inner.next, %inner_loop ], [ 83, %inner_loop_preheader ] +; CHECK-NEXT: %IV.inner.next = add nsw i32 %IV.inner, -3 +; CHECK-NEXT: %tmp3 = zext i32 %IV.inner.next to i64 +; CHECK-NEXT: %tmp4 = icmp slt i64 %IV, %tmp3 +; CHECK-NEXT: br i1 %tmp4, label %inner_loop, label %latch +; CHECK: latch: +; CHECK-NEXT: %IV.next = add nsw i64 %IV, 1 +; CHECK-NEXT: %tmp5 = icmp sgt i64 %IV, 82 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp slt i64 %IV, 82 +; CHECK-NEXT: [[COND2:%[^ ]+]] = xor i1 [[COND1]], true +; CHECK-NEXT: br i1 [[COND2]], label %main.exit.selector, label %loop +; CHECK: main.pseudo.exit: +; CHECK-NEXT: %IV.copy = phi i64 [ 5, %entry ], [ %IV.next.lcssa, %main.exit.selector ] +; CHECK-NEXT: %indvar.end = phi i64 [ 5, %entry ], [ %IV.lcssa, %main.exit.selector ] +; CHECK-NEXT: br label %postloop +; CHECK: loop.postloop: +; CHECK-NEXT: %IV.postloop = phi i64 [ %IV.copy, %postloop ], [ %IV.next.postloop, %latch.postloop ] +; CHECK-NEXT: %range_check.postloop = icmp slt i64 %IV.postloop, 83 +; CHECK-NEXT: br i1 %range_check.postloop, label %inner_loop_preheader.postloop, label %out_of_bounds.loopexit +; CHECK: latch.postloop: +; CHECK-NEXT: %IV.next.postloop = add nsw i64 %IV.postloop, 1 +; CHECK-NEXT: %tmp5.postloop = icmp sgt i64 %IV.postloop, 82 +; CHECK-NEXT: br i1 %tmp5.postloop, label %exit.loopexit, label %loop.postloop + +entry: + br label %loop + +loop: + %IV = phi i64 [ 5, %entry ], [ %IV.next, %latch ] + %range_check = icmp slt i64 %IV, 83 + br i1 %range_check, label %inner_loop_preheader, label %out_of_bounds + +inner_loop_preheader: + br label %inner_loop + +inner_loop: + %IV.inner = phi i32 [ %IV.inner.next, %inner_loop ], [ 83, %inner_loop_preheader ] + %IV.inner.next = add nsw i32 %IV.inner, -3 + %tmp3 = zext i32 %IV.inner.next to i64 + %tmp4 = icmp slt i64 %IV, %tmp3 + br i1 %tmp4, label %inner_loop, label %latch + +latch: + %IV.next = add nsw i64 %IV, 1 + %tmp5 = icmp sgt i64 %IV, 82 + br i1 %tmp5, label %exit, label %loop + +exit: + ret void + +out_of_bounds: + ret void +} + +; Make sure that we insert a postloop in the situation when we should exit by +; the range check on the last iteration in decrementing loop. Unsigned case. + +define void @test_08() #0 { + +; CHECK-LABEL: test_08 +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: br i1 true, label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: ; preds = %loop.preheader, %latch +; CHECK-NEXT: %IV = phi i64 [ %IV.next, %latch ], [ 83, %loop.preheader ] +; CHECK-NEXT: %IV.offset = add i64 %IV, -5 +; CHECK-NEXT: %range_check = icmp ult i64 %IV.offset, 200 +; CHECK-NEXT: br i1 true, label %inner_loop_preheader, label %out_of_bounds.loopexit1 +; CHECK: latch: ; preds = %inner_loop +; CHECK-NEXT: %IV.next = add i64 %IV, -1 +; CHECK-NEXT: %tmp5 = icmp ult i64 %IV, 5 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i64 %IV, 5 +; CHECK-NEXT: [[COND2:%[^ ]+]] = xor i1 [[COND1]], true +; CHECK-NEXT: br i1 [[COND2]], label %main.exit.selector, label %loop +; CHECK: main.exit.selector: ; preds = %latch +; CHECK-NEXT: %IV.next.lcssa = phi i64 [ %IV.next, %latch ] +; CHECK-NEXT: %IV.lcssa = phi i64 [ %IV, %latch ] +; CHECK-NEXT: %2 = icmp ugt i64 %IV.lcssa, 4 +; CHECK-NEXT: br i1 %2, label %main.pseudo.exit, label %exit +; CHECK: loop.postloop: +; CHECK-NEXT: %IV.postloop = phi i64 [ %IV.copy, %postloop ], [ %IV.next.postloop, %latch.postloop ] +; CHECK-NEXT: %IV.offset.postloop = add i64 %IV.postloop, -5 +; CHECK-NEXT: %range_check.postloop = icmp ult i64 %IV.offset.postloop, 200 +; CHECK-NEXT: br i1 %range_check.postloop, label %inner_loop_preheader.postloop, label %out_of_bounds.loopexit +; CHECK: latch.postloop: +; CHECK-NEXT: %IV.next.postloop = add i64 %IV.postloop, -1 +; CHECK-NEXT: %tmp5.postloop = icmp ult i64 %IV.postloop, 5 +; CHECK-NEXT: br i1 %tmp5.postloop, label %exit.loopexit, label %loop.postloop + +entry: + br label %loop + +loop: ; preds = %latch, %entry + %IV = phi i64 [ 83, %entry ], [ %IV.next, %latch ] + %IV.offset = add i64 %IV, -5 + %range_check = icmp ult i64 %IV.offset, 200 + br i1 %range_check, label %inner_loop_preheader, label %out_of_bounds + +inner_loop_preheader: ; preds = %loop + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %inner_loop_preheader + %IV.inner = phi i32 [ %IV.inner.next, %inner_loop ], [ 83, %inner_loop_preheader ] + %IV.inner.next = add nsw i32 %IV.inner, -3 + %tmp3 = zext i32 %IV.inner.next to i64 + %tmp4 = icmp ult i64 %IV, %tmp3 + br i1 %tmp4, label %inner_loop, label %latch + +latch: ; preds = %inner_loop + %IV.next = add i64 %IV, -1 + %tmp5 = icmp ult i64 %IV, 5 + br i1 %tmp5, label %exit, label %loop + +exit: ; preds = %latch, %loop + ret void + +out_of_bounds: + ret void +} + +; Make sure that we insert a postloop in the situation when we should exit by +; the range check on the last iteration in decrementing loop. Signed case. + +define void @test_09() #0 { + +; CHECK-LABEL: test_09 +; CHECK-NOT: preloop +; CHECK: entry: +; CHECK-NEXT: br i1 true, label %loop.preheader, label %main.pseudo.exit +; CHECK: loop: ; preds = %loop.preheader, %latch +; CHECK-NEXT: %IV = phi i64 [ %IV.next, %latch ], [ 83, %loop.preheader ] +; CHECK-NEXT: %IV.offset = add i64 %IV, -5 +; CHECK-NEXT: %rc1 = icmp slt i64 %IV.offset, 200 +; CHECK-NEXT: %rc2 = icmp sge i64 %IV.offset, 0 +; CHECK-NEXT: %range_check = and i1 %rc1, %rc2 +; CHECK-NEXT: br i1 true, label %inner_loop_preheader, label %out_of_bounds.loopexit1 +; CHECK: latch: ; preds = %inner_loop +; CHECK-NEXT: %IV.next = add i64 %IV, -1 +; CHECK-NEXT: %tmp5 = icmp slt i64 %IV, 5 +; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp sgt i64 %IV, 5 +; CHECK-NEXT: [[COND2:%[^ ]+]] = xor i1 [[COND1]], true +; CHECK-NEXT: br i1 [[COND2]], label %main.exit.selector, label %loop +; CHECK: main.exit.selector: ; preds = %latch +; CHECK-NEXT: %IV.next.lcssa = phi i64 [ %IV.next, %latch ] +; CHECK-NEXT: %IV.lcssa = phi i64 [ %IV, %latch ] +; CHECK-NEXT: %2 = icmp sgt i64 %IV.lcssa, 4 +; CHECK-NEXT: br i1 %2, label %main.pseudo.exit, label %exit +; CHECK: loop.postloop: ; preds = %postloop, %latch.postloop +; CHECK-NEXT: %IV.postloop = phi i64 [ %IV.copy, %postloop ], [ %IV.next.postloop, %latch.postloop ] +; CHECK-NEXT: %IV.offset.postloop = add i64 %IV.postloop, -5 +; CHECK-NEXT: %rc1.postloop = icmp slt i64 %IV.offset.postloop, 200 +; CHECK-NEXT: %rc2.postloop = icmp sge i64 %IV.offset.postloop, 0 +; CHECK-NEXT: %range_check.postloop = and i1 %rc1.postloop, %rc2.postloop +; CHECK-NEXT: br i1 %range_check.postloop, label %inner_loop_preheader.postloop, label %out_of_bounds.loopexit +; CHECK: latch.postloop: ; preds = %inner_loop.postloop +; CHECK-NEXT: %IV.next.postloop = add i64 %IV.postloop, -1 +; CHECK-NEXT: %tmp5.postloop = icmp slt i64 %IV.postloop, 5 +; CHECK-NEXT: br i1 %tmp5.postloop, label %exit.loopexit, label %loop.postloop + +entry: + br label %loop + +loop: ; preds = %latch, %entry + %IV = phi i64 [ 83, %entry ], [ %IV.next, %latch ] + %IV.offset = add i64 %IV, -5 + %rc1 = icmp slt i64 %IV.offset, 200 + %rc2 = icmp sge i64 %IV.offset, 0 + %range_check = and i1 %rc1, %rc2 + br i1 %range_check, label %inner_loop_preheader, label %out_of_bounds + +inner_loop_preheader: ; preds = %loop + br label %inner_loop + +inner_loop: ; preds = %inner_loop, %inner_loop_preheader + %IV.inner = phi i32 [ %IV.inner.next, %inner_loop ], [ 83, %inner_loop_preheader ] + %IV.inner.next = add nsw i32 %IV.inner, -3 + %tmp3 = zext i32 %IV.inner.next to i64 + %tmp4 = icmp slt i64 %IV, %tmp3 + br i1 %tmp4, label %inner_loop, label %latch + +latch: ; preds = %inner_loop + %IV.next = add i64 %IV, -1 + %tmp5 = icmp slt i64 %IV, 5 + br i1 %tmp5, label %exit, label %loop + +exit: ; preds = %latch, %loop + ret void + +out_of_bounds: + ret void +} + +!0 = !{i32 0, i32 50} +!1 = !{i64 0, i64 50}