Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -701,12 +701,14 @@ S1, CapForS1); } -static bool CanBeMin(ScalarEvolution &SE, const SCEV *S, bool Signed) { - APInt Min = Signed ? - APInt::getSignedMinValue(cast(S->getType())->getBitWidth()) : - APInt::getMinValue(cast(S->getType())->getBitWidth()); - return SE.getSignedRange(S).contains(Min) && - SE.getUnsignedRange(S).contains(Min); +static bool CannotBeMinInLoop(const SCEV *BoundSCEV, Loop *L, + ScalarEvolution &SE, bool Signed) { + unsigned BitWidth = cast(BoundSCEV->getType())->getBitWidth(); + APInt Min = Signed ? APInt::getSignedMinValue(BitWidth) : + APInt::getMinValue(BitWidth); + auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + return SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV, + SE.getConstant(Min)); } static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, @@ -887,17 +889,24 @@ Pred = ICmpInst::ICMP_ULT; else Pred = ICmpInst::ICMP_SLT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { // while (true) { while (true) { // if (++i == len) ---> if (++i > len - 1) // break; break; // ... ... // } } - // TODO: Insert ICMP_UGT if both are non-negative? - Pred = ICmpInst::ICMP_SGT; - RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); - DecreasedRightValueByOne = true; + if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/false)) { + Pred = ICmpInst::ICMP_UGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } else if (CannotBeMinInLoop(RightSCEV, &L, SE, /*Signed*/true)) { + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } } } @@ -928,10 +937,10 @@ const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) { - // TODO: this restriction is easily removable -- we just have to - // remember that the icmp was an slt and not an sle. - FailureReason = "limit may overflow when coercing le to lt"; - return None; + if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE) { + FailureReason = "limit may overflow when coercing le to lt"; + return None; + } } if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || @@ -1461,13 +1470,15 @@ if (Increasing) ExitPreLoopAtSCEV = *SR.LowLimit; else { - if (CanBeMin(SE, *SR.HighLimit, IsSignedPredicate)) { + if (CannotBeMinInLoop(*SR.HighLimit, &OriginalLoop, SE, + IsSignedPredicate)) { + ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); + } else { DEBUG(dbgs() << "irce: could not prove no-overflow when computing " << "preloop exit limit. HighLimit = " << *(*SR.HighLimit) << "\n"); return false; } - ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); } if (!isSafeToExpandAt(ExitPreLoopAtSCEV, InsertPt, SE)) { @@ -1487,13 +1498,15 @@ if (Increasing) ExitMainLoopAtSCEV = *SR.HighLimit; else { - if (CanBeMin(SE, *SR.LowLimit, IsSignedPredicate)) { + if (CannotBeMinInLoop(*SR.LowLimit, &OriginalLoop, SE, + IsSignedPredicate)) { + ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); + } else { DEBUG(dbgs() << "irce: could not prove no-overflow when computing " << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit) << "\n"); return false; } - ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); } if (!isSafeToExpandAt(ExitMainLoopAtSCEV, InsertPt, SE)) { @@ -1788,6 +1801,7 @@ << "\n";); return false; } + LoopStructure LS = MaybeLoopStructure.getValue(); const SCEVAddRecExpr *IndVar = cast(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep))); Index: test/Transforms/IRCE/eq_ne.ll =================================================================== --- test/Transforms/IRCE/eq_ne.ll +++ test/Transforms/IRCE/eq_ne.ll @@ -111,7 +111,7 @@ ; CHECK: test_03( ; CHECK: main.exit.selector: ; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] -; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100 ; CHECK-NEXT: br i1 [[COND]] entry: @@ -144,7 +144,7 @@ ; CHECK: test_04( entry: - %len = load i32, i32* %a_len_ptr, !range !0 + %len = load i32, i32* %a_len_ptr br label %loop loop: Index: test/Transforms/IRCE/variable-loop-bounds.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/variable-loop-bounds.ll @@ -0,0 +1,40 @@ +; RUN: opt -irce -S -verify-loop-info -irce-print-changed-loops -irce-skip-profitability-checks < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test01: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +define void @test01(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: + %cmp16 = icmp sgt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, 512 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} +