Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -308,6 +308,7 @@ ICmpInst::Predicate Pred = ICI->getPredicate(); Value *LHS = ICI->getOperand(0); Value *RHS = ICI->getOperand(1); + DEBUG(dbgs() << "irce: parse range check on "; ICI->dump()); switch (Pred) { default: @@ -709,6 +710,15 @@ SE.getUnsignedRange(S).contains(Min); } +static bool LoopGuardedAgainstMin(Loop *L, ScalarEvolution &SE, const SCEV *S, + bool Signed) { + APInt Min = Signed ? + APInt::getSignedMinValue(cast(S->getType())->getBitWidth()) : + APInt::getMinValue(cast(S->getType())->getBitWidth()); + auto Predicate = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; + return SE.isLoopEntryGuardedByCond(L, Predicate, S, SE.getConstant(Min)); +} + static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, bool Signed) { // S1 > INT_MIN - S2 ===> S1 + S2 > INT_MIN. @@ -865,6 +875,7 @@ return None; } + DEBUG(dbgs() << "irce: IndVarBase = "; IndVarBase->dump()); const SCEV *StartNext = IndVarBase->getStart(); const SCEV *Addend = SE.getNegativeSCEV(IndVarBase->getStepRecurrence(SE)); const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); @@ -887,17 +898,26 @@ Pred = ICmpInst::ICMP_ULT; else Pred = ICmpInst::ICMP_SLT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMin(SE, RightSCEV, /* IsSignedPredicate */ true)) { + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { // while (true) { while (true) { // if (++i == len) ---> if (++i > len - 1) // break; break; // ... ... // } } - // TODO: Insert ICMP_UGT if both are non-negative? - Pred = ICmpInst::ICMP_SGT; - RightSCEV = SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())); - DecreasedRightValueByOne = true; + // IndVarStart is non negative and we're increasing with no wrap. + if (SE.isKnownNonNegative(IndVarStart) && + IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + LoopGuardedAgainstMin(&L, SE, RightSCEV, false)) { + Pred = ICmpInst::ICMP_UGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } else if (LoopGuardedAgainstMin(&L, SE, RightSCEV, true)) { + Pred = ICmpInst::ICMP_SGT; + RightSCEV = SE.getMinusSCEV(RightSCEV, + SE.getOne(RightSCEV->getType())); + DecreasedRightValueByOne = true; + } } } @@ -928,10 +948,10 @@ const SCEV *StepMinusOne = SE.getMinusSCEV(Step, SE.getOne(Step->getType())); if (SumCanReachMax(SE, RightSCEV, StepMinusOne, IsSignedPredicate)) { - // TODO: this restriction is easily removable -- we just have to - // remember that the icmp was an slt and not an sle. - FailureReason = "limit may overflow when coercing le to lt"; - return None; + if (Pred == ICmpInst::ICMP_SLE || Pred == ICmpInst::ICMP_ULE) { + FailureReason = "limit may overflow when coercing le to lt"; + return None; + } } if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || @@ -1764,8 +1784,10 @@ InductiveRangeCheck::extractRangeChecksFromBranch(TBI, L, SE, BPI, RangeChecks); - if (RangeChecks.empty()) + if (RangeChecks.empty()) { + DEBUG(dbgs() << "irce: empty range checks.\n"); return false; + } auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) { OS << "irce: looking at loop "; L->print(OS); @@ -1788,6 +1810,8 @@ << "\n";); return false; } + DEBUG(dbgs() << "irce: loop structure parsed.\n"); + LoopStructure LS = MaybeLoopStructure.getValue(); const SCEVAddRecExpr *IndVar = cast(SE.getMinusSCEV(SE.getSCEV(LS.IndVarBase), SE.getSCEV(LS.IndVarStep))); @@ -1820,8 +1844,10 @@ } } - if (!SafeIterRange.hasValue()) + if (!SafeIterRange.hasValue()) { + DEBUG(dbgs() << "irce: unsafe iterator range.\n"); return false; + } auto &DT = getAnalysis().getDomTree(); LoopConstrainer LC(*L, getAnalysis().getLoopInfo(), LPM, Index: test/Transforms/IRCE/eq_ne.ll =================================================================== --- test/Transforms/IRCE/eq_ne.ll +++ test/Transforms/IRCE/eq_ne.ll @@ -111,7 +111,7 @@ ; CHECK: test_03( ; CHECK: main.exit.selector: ; CHECK-NEXT: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %idx.next, %in.bounds ] -; CHECK-NEXT: [[COND:%[^ ]+]] = icmp slt i32 [[PSEUDO_PHI]], 100 +; CHECK-NEXT: [[COND:%[^ ]+]] = icmp ult i32 [[PSEUDO_PHI]], 100 ; CHECK-NEXT: br i1 [[COND]] entry: @@ -144,7 +144,7 @@ ; CHECK: test_04( entry: - %len = load i32, i32* %a_len_ptr, !range !0 + %len = load i32, i32* %a_len_ptr br label %loop loop: Index: test/Transforms/IRCE/variable-loop-bounds.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/variable-loop-bounds.ll @@ -0,0 +1,40 @@ +; RUN: opt -irce -S -verify-loop-info -irce-print-changed-loops -irce-skip-profitability-checks < %s 2>&1 | FileCheck %s + +; CHECK: irce: in function test01: constrained Loop at depth 1 containing: %for.body
,%if.else,%if.then,%for.inc +define void @test01(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %N) { +entry: + %cmp16 = icmp sgt i32 %N, 0 + br i1 %cmp16, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret void + +for.body: + %i.017 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] + %cmp1 = icmp ult i32 %i.017, 512 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %i.017 + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %i.017 + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp1, label %if.then, label %if.else + +if.then: + %sub = sub i32 %0, %1 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %i.017 + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %sub, %2 + store i32 %add, i32* %arrayidx3, align 4 + br label %for.inc + +if.else: + %add6 = add nsw i32 %1, %0 + %arrayidx7 = getelementptr inbounds i32, i32* %a, i32 %i.017 + store i32 %add6, i32* %arrayidx7, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.017, 1 + %exitcond = icmp eq i32 %inc, %N + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} +