Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -308,6 +308,7 @@ ICmpInst::Predicate Pred = ICI->getPredicate(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); + DEBUG(dbgs() << "irce: parse range check on "; ICI->dump()); switch (Pred) { default: @@ -709,6 +710,15 @@ SE.getUnsignedRange(S).contains(Min); } +static bool LoopGuardedAgainstMin(Loop *L, ScalarEvolution &SE, const SCEV *S, + bool Signed) { + APInt Min = Signed ? + APInt::getSignedMinValue(cast(S->getType())->getBitWidth()) : + APInt::getMinValue(cast(S->getType())->getBitWidth()); + auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + return SE.isLoopEntryGuardedByCond(L, Predicate, S, SE.getConstant(Min)); +} + static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, bool Signed) { // S1 > INT_MIN - S2 ===> S1 + S2 > INT_MIN. @@ -865,6 +875,7 @@ return None; } + DEBUG(dbgs() << "irce: IndVarBase = "; IndVarBase->dump()); const SCEV *StartNext = IndVarBase->getStart(); const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE)); const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); @@ -887,17 +898,26 @@ Pred = ICmpInst::ICMP_ULT; else Pred = ICmpInst::ICMP_SLT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { // while (true) { while (true) { // if (++i == len) ---> if (++i > len - 1) // break; break; // ... ... // } } - // TODO: Insert ICMP_UGT if both are non-negative? - Pred = ICmpInst::ICMP_SGT; - RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); - DecreasedRightValueByOne = true; + // IndVarStart is non negative and we're increasing with no wrap. + if (SE.isKnownNonNegative(IndVarStart) && + IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + LoopGuardedAgainstMin(&L, SE, RightSCEV, false)) { + Pred = ICmpInst::ICMP_UGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } else if (LoopGuardedAgainstMin(&L, SE, RightSCEV, true)) { + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } } } @@ -928,10 +948,10 @@ const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) { - // TODO: this restriction is easily removable -- we just have to - // remember that the icmp was an slt and not an sle. - FailureReason = "limit may overflow when coercing le to lt"; - return None; + if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE) { + FailureReason = "limit may overflow when coercing le to lt"; + return None; + } } if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || @@ -1764,8 +1784,10 @@ InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI, RangeChecks); - if (RangeChecks.empty()) + if (RangeChecks.empty()) { + DEBUG(dbgs() << "irce: empty range checks.\n"); return false; + } auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) { OS << "irce: looking at loop "; L->print(OS); @@ -1788,6 +1810,8 @@ << "\n";); return false; } + DEBUG(dbgs() << "irce: loop structure parsed.\n"); + LoopStructure LS = MaybeLoopStructure.getValue(); const SCEVAddRecExpr *IndVar = cast(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep))); @@ -1820,8 +1844,10 @@ } } - if (!SafeIterRange.hasValue()) + if (!SafeIterRange.hasValue()) { + DEBUG(dbgs() << "irce: unsafe iterator range.\n"); return false; + } auto &DT = getAnalysis().getDomTree(); LoopConstrainer LC(*L, getAnalysis().getLoopInfo(), LPM, Index: test/Transforms/IRCE/eq_ne.ll =================================================================== --- test/Transforms/IRCE/eq_ne.ll +++ test/Transforms/IRCE/eq_ne.ll @@ -111,7 +111,7 @@ ; CHECK: test_03( ; CHECK: main.exit.selector: ; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] -; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100 ; CHECK-NEXT: br i1 [[COND]] entry: @@ -144,7 +144,7 @@ ; CHECK: test_04( entry: - %len = load i32, i32* %a_len_ptr, !range !0 + %len = load i32, i32* %a_len_ptr br label %loop loop: