Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -459,11 +459,13 @@ Value *IndVarStart; Value *LoopExitAt; bool IndVarIncreasing; + bool SignedPredicate; LoopStructure() : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr), - IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {} + IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false), + SignedPredicate(true) {} template LoopStructure map(M Map) const { LoopStructure Result; @@ -477,6 +479,7 @@ Result.IndVarStart = Map(IndVarStart); Result.LoopExitAt = Map(LoopExitAt); Result.IndVarIncreasing = IndVarIncreasing; + Result.SignedPredicate = SignedPredicate; return Result; } @@ -542,7 +545,7 @@ // intersection of `Range' and the iteration space of the original loop. // Return None if unable to compute the set of subranges. // - Optional calculateSubRanges() const; + Optional calculateSubRanges(bool SignedPredicate) const; // Clone `OriginalLoop' and return the result in CLResult. The IR after // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- @@ -649,22 +652,25 @@ PN->setIncomingBlock(i, ReplaceBy); } -static bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) { - APInt SMax = - APInt::getSignedMaxValue(cast(S->getType())->getBitWidth()); - return SE.getSignedRange(S).contains(SMax) && - SE.getUnsignedRange(S).contains(SMax); +static bool CanBeMax(ScalarEvolution &SE, const SCEV *S, bool Signed) { + APInt Max = Signed ? + APInt::getSignedMaxValue(cast(S->getType())->getBitWidth()) : + APInt::getMaxValue(cast(S->getType())->getBitWidth()); + return SE.getSignedRange(S).contains(Max) && + SE.getUnsignedRange(S).contains(Max); } -static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) { - APInt SMin = - APInt::getSignedMinValue(cast(S->getType())->getBitWidth()); - return SE.getSignedRange(S).contains(SMin) && - SE.getUnsignedRange(S).contains(SMin); +static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) { + APInt Min = Signed ? + APInt::getSignedMinValue(cast(S->getType())->getBitWidth()) : + APInt::getMinValue(cast(S->getType())->getBitWidth()); + return SE.getSignedRange(S).contains(Min) && + SE.getUnsignedRange(S).contains(Min); } Optional -LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI, +LoopStructure::parseLoopStructure(ScalarEvolution &SE, + BranchProbabilityInfo &BPI, Loop &L, const char *&FailureReason) { if (!L.isLoopSimplifyForm()) { FailureReason = "loop not in LoopSimplify form"; @@ -793,6 +799,7 @@ const SCEVAddRecExpr *IndVarNext = cast(LeftSCEV); bool IsIncreasing = false; + bool SignedPredicate = true; if (!IsInductionVar(IndVarNext, IsIncreasing)) { FailureReason = "LHS in icmp not induction variable"; return None; @@ -803,7 +810,6 @@ const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); ConstantInt *One = ConstantInt::get(IndVarTy, 1); - // TODO: generalize the predicates here to also match their unsigned variants. if (IsIncreasing) { bool DecreasedRightValueByOne = false; // Try to turn eq/ne predicates to those we can work with. @@ -811,38 +817,45 @@ // while (++i != len) { while (++i < len) { // ... ---> ... // } } + // TODO: Insert ICMP_ULT if both are non-negative? Pred = ICmpInst::ICMP_SLT; else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeSMin(SE, RightSCEV)) { + !CanBeMin(SE, RightSCEV, /* SignedPredicate */ true)) { // while (true) { while (true) { // if (++i == len) ---> if (++i > len - 1) // break; break; // ... ... // } } + // TODO: Insert ICMP_UGT if both are non-negative? Pred = ICmpInst::ICMP_SGT; RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); DecreasedRightValueByOne = true; } + bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); + bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT); bool FoundExpectedPred = - (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) || - (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); + (LTPred && LatchBrExitIdx == 1) || (GTPred && LatchBrExitIdx == 0); if (!FoundExpectedPred) { FailureReason = "expected icmp slt semantically, found something else"; return None; } + SignedPredicate = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; + ICmpInst::Predicate GuardPred = + SignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + if (LatchBrExitIdx == 0) { - if (CanBeSMax(SE, RightSCEV)) { + if (CanBeMax(SE, RightSCEV, SignedPredicate)) { // TODO: this restriction is easily removable -- we just have to // remember that the icmp was an slt and not an sle. - FailureReason = "limit may overflow when coercing sle to slt"; + FailureReason = "limit may overflow when coercing le to lt"; return None; } if (!SE.isLoopEntryGuardedByCond( - &L, CmpInst::ICMP_SLT, IndVarStart, + &L, GuardPred, IndVarStart, SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())))) { FailureReason = "Induction variable start not bounded by upper limit"; return None; @@ -855,8 +868,7 @@ RightValue = B.CreateAdd(RightValue, One); } } else { - if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SLT, IndVarStart, - RightSCEV)) { + if (!SE.isLoopEntryGuardedByCond(&L, GuardPred, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by upper limit"; return None; } @@ -870,38 +882,46 @@ // while (--i != len) { while (--i > len) { // ... ---> ... // } } + // TODO: Insert ICMP_UGT if both are non-negative? Pred = ICmpInst::ICMP_SGT; else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeSMax(SE, RightSCEV)) { + !CanBeMax(SE, RightSCEV, /* SignedPredicate */ true)) { // while (true) { while (true) { // if (--i == len) ---> if (--i < len + 1) // break; break; // ... ... // } } + // TODO: Insert ICMP_ULT if both are non-negative? Pred = ICmpInst::ICMP_SLT; RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); IncreasedRightValueByOne = true; } + bool LTPred = (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_ULT); + bool GTPred = (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_UGT); + bool FoundExpectedPred = - (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) || - (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); + (GTPred && LatchBrExitIdx == 1) || (LTPred && LatchBrExitIdx == 0); if (!FoundExpectedPred) { FailureReason = "expected icmp sgt semantically, found something else"; return None; } + SignedPredicate = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; + ICmpInst::Predicate GuardPred = + SignedPredicate ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; + if (LatchBrExitIdx == 0) { - if (CanBeSMin(SE, RightSCEV)) { + if (CanBeMin(SE, RightSCEV, SignedPredicate)) { // TODO: this restriction is easily removable -- we just have to // remember that the icmp was an sgt and not an sge. - FailureReason = "limit may overflow when coercing sge to sgt"; + FailureReason = "limit may overflow when coercing ge to gt"; return None; } if (!SE.isLoopEntryGuardedByCond( - &L, CmpInst::ICMP_SGT, IndVarStart, + &L, GuardPred, IndVarStart, SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())))) { FailureReason = "Induction variable start not bounded by lower limit"; return None; @@ -914,8 +934,7 @@ RightValue = B.CreateSub(RightValue, One); } } else { - if (!SE.isLoopEntryGuardedByCond(&L, CmpInst::ICMP_SGT, IndVarStart, - RightSCEV)) { + if (!SE.isLoopEntryGuardedByCond(&L, GuardPred, IndVarStart, RightSCEV)) { FailureReason = "Induction variable start not bounded by lower limit"; return None; } @@ -923,7 +942,6 @@ "Right value can be increased only for LatchBrExitIdx == 0!"); } } - BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); assert(SE.getLoopDisposition(LatchCount, &L) == @@ -949,6 +967,7 @@ Result.IndVarNext = LeftValue; Result.IndVarIncreasing = IsIncreasing; Result.LoopExitAt = RightValue; + Result.SignedPredicate = SignedPredicate; FailureReason = nullptr; @@ -956,7 +975,7 @@ } Optional -LoopConstrainer::calculateSubRanges() const { +LoopConstrainer::calculateSubRanges(bool SignedPredicate) const { IntegerType *Ty = cast(LatchTakenCount->getType()); if (Range.getType() != Ty) @@ -983,38 +1002,53 @@ } else { // These two computations may sign-overflow. Here is why that is okay: // - // We know that the induction variable does not sign-overflow on any + // We know that the induction variable does not overflow on any // iteration except the last one, and it starts at `Start` and ends at // `End`, decrementing by one every time. // - // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the + // * if `Smallest` sign-overflows we know `End` is `INT_MAX`. Since the // induction variable is decreasing we know that that the smallest value - // the loop body is actually executed with is `INT_SMIN` == `Smallest`. + // the loop body is actually executed with is `INT_MIN` == `Smallest`. // - // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In + // * if `Greatest` sign-overflows, we know it can only be `INT_MIN`. In // that case, `Clamp` will always return `Smallest` and // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`) // will be an empty range. Returning an empty range is always safe. // + if (!SignedPredicate) { + // FIXME: For some reason this logic fails for unsigned comparison when + // the start value is UINT_MAX. We don't generate pre-loop even if we need + // it in this situation. Once this is fixed, we can remove this check. + // See example in test/Transforms/IRCE/unsigned_comparisons.ll test_12. + if (CanBeMax(SE, Start, SignedPredicate)) { + Result.LowLimit = Range.getBegin(); + Result.HighLimit = Range.getEnd(); + return Result; + } + } const SCEV *One = SE.getOne(Ty); Smallest = SE.getAddExpr(End, One); Greatest = SE.getAddExpr(Start, One); } - auto Clamp = [this, Smallest, Greatest](const SCEV *S) { - return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S)); + auto Clamp = [this, Smallest, Greatest, SignedPredicate](const SCEV *S) { + return SignedPredicate + ? SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S)) + : SE.getUMaxExpr(Smallest, SE.getUMinExpr(Greatest, S)); }; - // In some cases we can prove that we don't need a pre or post loop + // In some cases we can prove that we don't need a pre or post loop. + ICmpInst::Predicate Pred = + SignedPredicate ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; bool ProvablyNoPreloop = - SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest); + SE.isKnownPredicate(Pred, Range.getBegin(), Smallest); if (!ProvablyNoPreloop) Result.LowLimit = Clamp(Range.getBegin()); bool ProvablyNoPostLoop = - SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd()); + SE.isKnownPredicate(Pred, Greatest, Range.getEnd()); if (!ProvablyNoPostLoop) Result.HighLimit = Clamp(Range.getEnd()); @@ -1161,22 +1195,35 @@ BranchInst *PreheaderJump = cast(Preheader->getTerminator()); bool Increasing = LS.IndVarIncreasing; + bool SignedPredicate = LS.SignedPredicate; IRBuilder<> B(PreheaderJump); // EnterLoopCond - is it okay to start executing this `LS'? - Value *EnterLoopCond = Increasing - ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt) - : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt); + Value *EnterLoopCond; + if (Increasing) + EnterLoopCond = SignedPredicate + ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt) + : B.CreateICmpULT(LS.IndVarStart, ExitSubloopAt); + else + EnterLoopCond = SignedPredicate + ? B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt) + : B.CreateICmpUGT(LS.IndVarStart, ExitSubloopAt); B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); PreheaderJump->eraseFromParent(); LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); B.SetInsertPoint(LS.LatchBr); - Value *TakeBackedgeLoopCond = - Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt) - : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt); + Value *TakeBackedgeLoopCond; + if (Increasing) + TakeBackedgeLoopCond = SignedPredicate + ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt) + : B.CreateICmpULT(LS.IndVarNext, ExitSubloopAt); + else + TakeBackedgeLoopCond = SignedPredicate + ? B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt) + : B.CreateICmpUGT(LS.IndVarNext, ExitSubloopAt); Value *CondForBranch = LS.LatchBrExitIdx == 1 ? TakeBackedgeLoopCond : B.CreateNot(TakeBackedgeLoopCond); @@ -1188,9 +1235,15 @@ // IterationsLeft - are there any more iterations left, given the original // upper bound on the induction variable? If not, we branch to the "real" // exit. - Value *IterationsLeft = Increasing - ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt) - : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt); + Value *IterationsLeft; + if (Increasing) + IterationsLeft = SignedPredicate + ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt) + : B.CreateICmpULT(LS.IndVarNext, LS.LoopExitAt); + else + IterationsLeft = SignedPredicate + ? B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt) + : B.CreateICmpUGT(LS.IndVarNext, LS.LoopExitAt); B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); BranchInst *BranchToContinuation = @@ -1307,7 +1360,8 @@ OriginalPreheader = Preheader; MainLoopPreheader = Preheader; - Optional MaybeSR = calculateSubRanges(); + bool SignedPredicate = MainLoopStructure.SignedPredicate; + Optional MaybeSR = calculateSubRanges(SignedPredicate); if (!MaybeSR.hasValue()) { DEBUG(dbgs() << "irce: could not compute subranges\n"); return false; @@ -1341,7 +1395,7 @@ if (Increasing) ExitPreLoopAtSCEV = *SR.LowLimit; else { - if (CanBeSMin(SE, *SR.HighLimit)) { + if (CanBeMin(SE, *SR.HighLimit, SignedPredicate)) { DEBUG(dbgs() << "irce: could not prove no-overflow when computing " << "preloop exit limit. HighLimit = " << *(*SR.HighLimit) << "\n"); @@ -1360,7 +1414,7 @@ if (Increasing) ExitMainLoopAtSCEV = *SR.HighLimit; else { - if (CanBeSMin(SE, *SR.LowLimit)) { + if (CanBeMin(SE, *SR.LowLimit, SignedPredicate)) { DEBUG(dbgs() << "irce: could not prove no-overflow when computing " << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit) << "\n"); Index: test/Transforms/IRCE/unsigned_comparisons.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/unsigned_comparisons.ll @@ -0,0 +1,427 @@ +; RUN: opt -verify-loop-info -irce-print-changed-loops -irce -S < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test_01: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_04: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_06: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_07: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_08: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_09: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_10: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_11: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_12: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK-NOT: irce: in function test_13: constrained Loop at depth 1 containing: %loop
,%in.bounds + +; ULT condition for increasing loop. +define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_01 +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 100 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; UGT condition for increasing loop. +define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_02( +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 101 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 100 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; UGT condition for decreasing loop. +define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_03( +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ugt i32 [[PSEUDO_PHI]], 0 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 0 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; ULT condition for decreasing loops. +define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { +; CHECK: test_04( +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ugt i32 [[PSEUDO_PHI]], 0 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 100, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 1 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check SINT_MAX. +define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_05 +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 2147483647 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 2147483647 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check SINT_MAX + 1, test is similar to test_01. +define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_06 +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], -2147483648 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 2147483648 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check SINT_MAX + 1, test is similar to test_02. +define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_07( +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], -2147483647 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 2147483648 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check SINT_MAX + 1, test is similar to test_03. +define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_08( +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ugt i32 [[PSEUDO_PHI]], 0 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 2147483648, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 0 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Check SINT_MAX + 1, test is similar to test_04. +define void @test_09(i32* %arr, i32* %a_len_ptr) #0 { +; CHECK: test_09( +; CHECK: preloop.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ugt i32 [[PSEUDO_PHI]], 0 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 2147483648, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add i32 %idx, -1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 1 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Increasing loop, UINT_MAX. Positive test. +define void @test_10(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_10 +; CHECK: main.exit.selector: +; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], -1 +; CHECK-NEXT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 4294967295 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Increasing loop, UINT_MAX. Negative test: we cannot add 1 to UINT_MAX. +define void @test_11(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_11( + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 0, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nsw nuw i32 %idx, 1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 4294967295 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +; Decreasing loop, UINT_MAX. +; FIXME in calculateSubRanges relates to this test. It should be a positive +; test, but currently we cannot handle it. Negative checks should be replaced +; with check next once it is done. +define void @test_12(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_12( +; CHECK-NOT: preloop.exit.selector: +; CHECK-NOT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next.preloop, %in.bounds.preloop ] +; CHECK-NOT: [[COND:%[^ ]+]] = icmp ugt i32 [[PSEUDO_PHI]], 0 +; CHECK-NOT: br i1 [[COND]] + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 4294967295, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nuw i32 %idx, -1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ugt i32 %idx.next, 0 + br i1 %next, label %loop, label %exit + +out.of.bounds: + ret void + +exit: + ret void +} + +; Decreasing loop, UINT_MAX. Negative test: we cannot substract -1 from 0. +define void @test_13(i32* %arr, i32* %a_len_ptr) #0 { + +; CHECK: test_13( + +entry: + %len = load i32, i32* %a_len_ptr, !range !0 + br label %loop + +loop: + %idx = phi i32 [ 4294967295, %entry ], [ %idx.next, %in.bounds ] + %idx.next = add nuw i32 %idx, -1 + %abc = icmp ult i32 %idx, %len + br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 + +in.bounds: + %addr = getelementptr i32, i32* %arr, i32 %idx + store i32 0, i32* %addr + %next = icmp ult i32 %idx.next, 0 + br i1 %next, label %exit, label %loop + +out.of.bounds: + ret void + +exit: + ret void +} + +!0 = !{i32 0, i32 50} +!1 = !{!"branch_weights", i32 64, i32 4}