Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -686,27 +686,59 @@ SE.getUnsignedRange(S).contains(Max); } -static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, - bool Signed) { - // S1 < INT_MAX - S2 ===> S1 + S2 < INT_MAX. - assert(SE.isKnownNonNegative(S2) && - "We expected the 2nd arg to be non-negative!"); - const SCEV *Max = SE.getConstant( - Signed ? APInt::getSignedMaxValue( - cast(S1->getType())->getBitWidth()) - : APInt::getMaxValue( - cast(S1->getType())->getBitWidth())); - const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2); - return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, - S1, CapForS1); +/// Given a loop with an increasing induction variable, is it possible to +/// safely calculate the bounds of a new loop using the given Predicate. +static bool isSafeIncreasingBound(const SCEV *Start, + const SCEV *BoundSCEV, const SCEV *Step, + ICmpInst::Predicate Pred, + unsigned LatchBrExitIdx, + Loop *L, ScalarEvolution &SE) { + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && + Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) + return false; + + if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) + return false; + + DEBUG(dbgs() << "irce: isSafeIncreasingBound with:\n"); + DEBUG(dbgs() << "irce: Start: "; Start->dump()); + DEBUG(dbgs() << "irce: Step: "; Step->dump()); + DEBUG(dbgs() << "irce: BoundSCEV: "; BoundSCEV->dump()); + DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred) << "\n"); + DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n"); + + bool IsSigned = ICmpInst::isSigned(Pred); + // The predicate that we need to check that the induction variable lies + // within bounds. + ICmpInst::Predicate BoundPred = + IsSigned ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; + + if (LatchBrExitIdx == 1) + return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + + assert(LatchBrExitIdx == 0 && "LatchBrExitIdx should be 0 or 1"); + + const SCEV *StepMinusOne = + SE.getMinusSCEV(Step, SE.getOne(Step->getType())); + unsigned BitWidth = cast(BoundSCEV->getType())->getBitWidth(); + APInt Max = IsSigned ? APInt::getSignedMaxValue(BitWidth) : + APInt::getMaxValue(BitWidth); + const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Max), StepMinusOne); + + return (SE.isLoopEntryGuardedByCond(L, BoundPred, Start, + SE.getAddExpr(BoundSCEV, Step)) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit)); } -static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) { - APInt Min = Signed ? - APInt::getSignedMinValue(cast(S->getType())->getBitWidth()) : - APInt::getMinValue(cast(S->getType())->getBitWidth()); - return SE.getSignedRange(S).contains(Min) && - SE.getUnsignedRange(S).contains(Min); +static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L, + ScalarEvolution &SE, bool Signed) { + unsigned BitWidth = cast(BoundSCEV->getType())->getBitWidth(); + APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : + APInt::getMinValue(BitWidth); + auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + return SE.isAvailableAtLoopEntry(BoundSCEV, L) && + SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV, + SE.getConstant(Min)); } static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, @@ -887,17 +919,24 @@ Pred = ICmpInst::ICMP_ULT; else Pred = ICmpInst::ICMP_SLT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { // while (true) { while (true) { // if (++i == len) ---> if (++i > len - 1) // break; break; // ... ... // } } - // TODO: Insert ICMP_UGT if both are non-negative? - Pred = ICmpInst::ICMP_SGT; - RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); - DecreasedRightValueByOne = true; + if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) { + Pred = ICmpInst::ICMP_UGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } else if (CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) { + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } } } @@ -911,36 +950,18 @@ return None; } - IsSignedPredicate = - Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SGT; - + IsSignedPredicate = ICmpInst::isSigned(Pred); if (!IsSignedPredicate && !AllowUnsignedLatchCondition) { FailureReason = "unsigned latch conditions are explicitly prohibited"; return None; } - // The predicate that we need to check that the induction variable lies - // within bounds. - ICmpInst::Predicate BoundPred = - IsSignedPredicate ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT; - + if (!isSafeIncreasingBound(IndVarStart, RightSCEV, Step, Pred, + LatchBrExitIdx, &L, SE)) { + FailureReason = "Unsafe loop bounds"; + return None; + } if (LatchBrExitIdx == 0) { - const SCEV *StepMinusOne = SE.getMinusSCEV(Step, - SE.getOne(Step->getType())); - if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) { - // TODO: this restriction is easily removable -- we just have to - // remember that the icmp was an slt and not an sle. - FailureReason = "limit may overflow when coercing le to lt"; - return None; - } - - if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || - !SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart, - SE.getAddExpr(RightSCEV, Step))) { - FailureReason = "Induction variable start not bounded by upper limit"; - return None; - } - // We need to increase the right value unless we have already decreased // it virtually when we replaced EQ with SGT. if (!DecreasedRightValueByOne) { @@ -948,11 +969,6 @@ RightValue = B.CreateAdd(RightValue, One); } } else { - if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || - !SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart, RightSCEV)) { - FailureReason = "Induction variable start not bounded by upper limit"; - return None; - } assert(!DecreasedRightValueByOne && "Right value can be decreased only for LatchBrExitIdx == 0!"); } @@ -1461,13 +1477,15 @@ if (Increasing) ExitPreLoopAtSCEV = *SR.LowLimit; else { - if (CanBeMin(SE, *SR.HighLimit, IsSignedPredicate)) { + if (CannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE, + IsSignedPredicate)) + ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); + else { DEBUG(dbgs() << "irce: could not prove no-overflow when computing " << "preloop exit limit. HighLimit = " << *(*SR.HighLimit) << "\n"); return false; } - ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); } if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) { @@ -1487,13 +1505,15 @@ if (Increasing) ExitMainLoopAtSCEV = *SR.HighLimit; else { - if (CanBeMin(SE, *SR.LowLimit, IsSignedPredicate)) { + if (CannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE, + IsSignedPredicate)) + ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); + else { DEBUG(dbgs() << "irce: could not prove no-overflow when computing " << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit) << "\n"); return false; } - ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); } if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) { Index: test/Transforms/IRCE/eq_ne.ll =================================================================== --- test/Transforms/IRCE/eq_ne.ll +++ test/Transforms/IRCE/eq_ne.ll @@ -4,7 +4,7 @@ ; CHECK: irce: in function test_01u: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK-NOT: irce: in function test_02: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_03: constrained Loop at depth 1 containing: %loop
,%in.bounds -; CHECK-NOT: irce: in function test_04: constrained Loop at depth 1 containing: %loop
,%in.bounds +; CHECK: irce: in function test_04: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_05: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK-NOT: irce: in function test_06: constrained Loop at depth 1 containing: %loop
,%in.bounds ; CHECK: irce: in function test_07: constrained Loop at depth 1 containing: %loop
,%in.bounds @@ -111,7 +111,7 @@ ; CHECK: test_03( ; CHECK: main.exit.selector: ; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] -; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100 ; CHECK-NEXT: br i1 [[COND]] entry: @@ -137,12 +137,8 @@ ret void } -; Show that if n is not known to be greater than the starting value, IRCE -; doesn't apply. define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { -; CHECK: test_04( - entry: %len = load i32, i32* %a_len_ptr, !range !0 br label %loop Index: test/Transforms/IRCE/variable-loop-bounds.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/variable-loop-bounds.ll @@ -0,0 +1,153 @@ +; RUN: opt -irce -S -verify-loop-info -irce-print-changed-loops -irce-skip-profitability-checks < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test_inc_eq: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +define void @test_inc_eq(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: + %cmp16 = icmp sgt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, 512 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + +; CHECK: irce: in function test_inc_ne: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +define void @test_inc_ne(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: + %cmp16 = icmp sgt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, 512 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond = icmp ne i32 %inc, %N + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK: irce: in function test_inc_slt: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +define void @test_inc_slt(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: + %cmp16 = icmp sgt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, 512 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond = icmp slt i32 %inc, %N + br i1 %exitcond, label %for.body, label %for.cond.cleanup +} + +; CHECK: irce: in function test_inc_ult: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +define void @test_inc_ult(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: + %cmp16 = icmp ugt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, 512 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond = icmp ult i32 %inc, %N + br i1 %exitcond, label %for.body, label %for.cond.cleanup +}