Index: lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp =================================================================== --- lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp +++ lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp @@ -694,12 +694,61 @@ PN->setIncomingBlock(i, ReplaceBy); } -static bool CanBeMax(ScalarEvolution &SE, const SCEV *S, bool Signed) { - APInt Max = Signed ? - APInt::getSignedMaxValue(cast(S->getType())->getBitWidth()) : - APInt::getMaxValue(cast(S->getType())->getBitWidth()); - return SE.getSignedRange(S).contains(Max) && - SE.getUnsignedRange(S).contains(Max); +static bool CannotBeMaxInLoop(const SCEV *BoundSCEV, Loop *L, + ScalarEvolution &SE, bool Signed) { + unsigned BitWidth = cast(BoundSCEV->getType())->getBitWidth(); + APInt Max = Signed ? APInt::getSignedMaxValue(BitWidth) : + APInt::getMaxValue(BitWidth); + auto Predicate = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; + return SE.isAvailableAtLoopEntry(BoundSCEV, L) && + SE.isLoopEntryGuardedByCond(L, Predicate, BoundSCEV, + SE.getConstant(Max)); +} + +/// Given a loop with an deccreasing induction variable, is it possible to +/// safely calculate the bounds of a new loop using the given Predicate. +static bool isSafeDecreasingBound(const SCEV *Start, + const SCEV *BoundSCEV, const SCEV *Step, + ICmpInst::Predicate Pred, + unsigned LatchBrExitIdx, + Loop *L, ScalarEvolution &SE) { + if (Pred != ICmpInst::ICMP_SLT && Pred != ICmpInst::ICMP_SGT && + Pred != ICmpInst::ICMP_ULT && Pred != ICmpInst::ICMP_UGT) + return false; + + if (!SE.isAvailableAtLoopEntry(BoundSCEV, L)) + return false; + + DEBUG(dbgs() << "irce: isSafeDecreasingBound with:\n"); + DEBUG(dbgs() << "irce: Start: " << *Start << "\n"); + DEBUG(dbgs() << "irce: Step: " << *Step << "\n"); + DEBUG(dbgs() << "irce: BoundSCEV: " << *BoundSCEV << "\n"); + DEBUG(dbgs() << "irce: Pred: " << ICmpInst::getPredicateName(Pred) << "\n"); + DEBUG(dbgs() << "irce: LatchExitBrIdx: " << LatchBrExitIdx << "\n"); + + bool IsSigned = ICmpInst::isSigned(Pred); + // The predicate that we need to check that the induction variable lies + // within bounds. + ICmpInst::Predicate BoundPred = + IsSigned ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; + + if (LatchBrExitIdx == 1) + return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, BoundSCEV); + + assert(LatchBrExitIdx == 0 && + "LatchBrExitIdx should be either 0 or 1"); + + const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); + unsigned BitWidth = cast(BoundSCEV->getType())->getBitWidth(); + APInt Min = IsSigned ? APInt::getSignedMinValue(BitWidth) : + APInt::getMinValue(BitWidth); + const SCEV *Limit = SE.getMinusSCEV(SE.getConstant(Min), StepPlusOne); + + const SCEV *MinusOne = + SE.getMinusSCEV(BoundSCEV, SE.getOne(BoundSCEV->getType())); + + return SE.isLoopEntryGuardedByCond(L, BoundPred, Start, MinusOne) && + SE.isLoopEntryGuardedByCond(L, BoundPred, BoundSCEV, Limit); } static bool SumCanReachMax(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, @@ -725,21 +774,6 @@ SE.getUnsignedRange(S).contains(Min); } -static bool SumCanReachMin(ScalarEvolution &SE, const SCEV *S1, const SCEV *S2, - bool Signed) { - // S1 > INT_MIN - S2 ===> S1 + S2 > INT_MIN. - assert(SE.isKnownNonPositive(S2) && - "We expected the 2nd arg to be non-positive!"); - const SCEV *Max = SE.getConstant( - Signed ? APInt::getSignedMinValue( - cast(S1->getType())->getBitWidth()) - : APInt::getMinValue( - cast(S1->getType())->getBitWidth())); - const SCEV *CapForS1 = SE.getMinusSCEV(Max, S2); - return !SE.isKnownPredicate(Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, - S1, CapForS1); -} - Optional LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo *BPI, Loop &L, @@ -985,17 +1019,22 @@ // that both operands are non-negative, because it will only pessimize // our check against "RightSCEV - 1". Pred = ICmpInst::ICMP_SGT; - else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0 && - !CanBeMax(SE, RightSCEV, /* IsSignedPredicate */ true)) { + else if (Pred == ICmpInst::ICMP_EQ && LatchBrExitIdx == 0) { // while (true) { while (true) { // if (--i == len) ---> if (--i < len + 1) // break; break; // ... ... // } } - // TODO: Insert ICMP_ULT if both are non-negative? - Pred = ICmpInst::ICMP_SLT; - RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); - IncreasedRightValueByOne = true; + if (IndVarBase->getNoWrapFlags(SCEV::FlagNUW) && + CannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ false)) { + Pred = ICmpInst::ICMP_ULT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } else if (CannotBeMaxInLoop(RightSCEV, &L, SE, /* Signed */ true)) { + Pred = ICmpInst::ICMP_SLT; + RightSCEV = SE.getAddExpr(RightSCEV, SE.getOne(RightSCEV->getType())); + IncreasedRightValueByOne = true; + } } } @@ -1018,28 +1057,13 @@ return None; } - // The predicate that we need to check that the induction variable lies - // within bounds. - ICmpInst::Predicate BoundPred = - IsSignedPredicate ? CmpInst::ICMP_SGT : CmpInst::ICMP_UGT; + if (!isSafeDecreasingBound(IndVarStart, RightSCEV, Step, Pred, + LatchBrExitIdx, &L, SE)) { + FailureReason = "Unsafe bounds"; + return None; + } if (LatchBrExitIdx == 0) { - const SCEV *StepPlusOne = SE.getAddExpr(Step, SE.getOne(Step->getType())); - if (SumCanReachMin(SE, RightSCEV, StepPlusOne, IsSignedPredicate)) { - // TODO: this restriction is easily removable -- we just have to - // remember that the icmp was an sgt and not an sge. - FailureReason = "limit may overflow when coercing ge to gt"; - return None; - } - - if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || - !SE.isLoopEntryGuardedByCond( - &L, BoundPred, IndVarStart, - SE.getMinusSCEV(RightSCEV, SE.getOne(RightSCEV->getType())))) { - FailureReason = "Induction variable start not bounded by lower limit"; - return None; - } - // We need to decrease the right value unless we have already increased // it virtually when we replaced EQ with SLT. if (!IncreasedRightValueByOne) { @@ -1047,11 +1071,6 @@ RightValue = B.CreateSub(RightValue, One); } } else { - if (!SE.isAvailableAtLoopEntry(RightSCEV, &L) || - !SE.isLoopEntryGuardedByCond(&L, BoundPred, IndVarStart, RightSCEV)) { - FailureReason = "Induction variable start not bounded by lower limit"; - return None; - } assert(!IncreasedRightValueByOne && "Right value can be increased only for LatchBrExitIdx == 0!"); } Index: test/Transforms/IRCE/variable-loop-bounds.ll =================================================================== --- /dev/null +++ test/Transforms/IRCE/variable-loop-bounds.ll @@ -0,0 +1,181 @@ +; RUN: opt -irce -S -verify-loop-info -irce-print-changed-loops -irce-skip-profitability-checks %s < %s 2>&1 | FileCheck %s +; CHECK: irce: in function signed_var_imm_dec_sgt: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc +; CHECK-NOT: irce: in function signed_var_imm_dec_slt: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc +; CHECK: irce: in function signed_var_imm_dec_sge: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc +; CHECK: irce: in function signed_var_imm_dec_ne: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc +; CHECK-NOT: irce: in function signed_var_imm_dec_eq: constrained Loop at depth 1 containing: %for.body
,%if.else,%for.inc + +; CHECK-LABEL: signed_var_imm_dec_sgt( +; CHECK: main.exit.selector: +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %dec, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], %M +; CHECK: br i1 [[COND]] +define void @signed_var_imm_dec_sgt(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %M) { +entry: + %cmp14 = icmp slt i32 %M, 1024 + br i1 %cmp14, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc, %entry + ret void + +for.body: ; preds = %entry, %for.inc + %iv = phi i32 [ %dec, %for.inc ], [ 1024, %entry ] + %cmp1 = icmp slt i32 %iv, 1024 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %iv + br i1 %cmp1, label %for.inc, label %if.else + +if.else: ; preds = %for.body + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %2, %mul + br label %for.inc + +for.inc: ; preds = %for.body, %if.else + %storemerge = phi i32 [ %add, %if.else ], [ %mul, %for.body ] + store i32 %storemerge, i32* %arrayidx3, align 4 + %dec = add nsw i32 %iv, -1 + %cmp = icmp sgt i32 %dec, %M + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +; CHECK-LABEL: signed_var_imm_dec_sge( +; CHECK: main.exit.selector: ; preds = %for.inc +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %iv, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], %M +; CHECK: br i1 [[COND]] +define void @signed_var_imm_dec_sge(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %M) { +entry: + %cmp14 = icmp sgt i32 %M, 1024 + br i1 %cmp14, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.inc, %entry + ret void + +for.body: ; preds = %entry, %for.inc + %iv = phi i32 [ %dec, %for.inc ], [ 1024, %entry ] + %cmp1 = icmp slt i32 %iv, 1024 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %iv + br i1 %cmp1, label %for.inc, label %if.else + +if.else: ; preds = %for.body + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %2, %mul + br label %for.inc + +for.inc: ; preds = %for.body, %if.else + %storemerge = phi i32 [ %add, %if.else ], [ %mul, %for.body ] + store i32 %storemerge, i32* %arrayidx3, align 4 + %dec = add nsw i32 %iv, -1 + %cmp = icmp sgt i32 %iv, %M + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @signed_var_imm_dec_slt(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %M) { +entry: + %cmp14 = icmp sgt i32 %M, 1024 + br i1 %cmp14, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.inc, %entry + ret void + +for.body: ; preds = %entry, %for.inc + %iv = phi i32 [ %dec, %for.inc ], [ 1024, %entry ] + %cmp1 = icmp slt i32 %iv, 1024 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %iv + br i1 %cmp1, label %for.inc, label %if.else + +if.else: ; preds = %for.body + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %2, %mul + br label %for.inc + +for.inc: ; preds = %for.body, %if.else + %storemerge = phi i32 [ %add, %if.else ], [ %mul, %for.body ] + store i32 %storemerge, i32* %arrayidx3, align 4 + %dec = add nsw i32 %iv, -1 + %cmp = icmp slt i32 %iv, %M + br i1 %cmp, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: signed_var_imm_dec_ne( +; CHECK: main.exit.selector: ; preds = %for.inc +; CHECK: [[PSEUDO_PHI:%[^ ]+]] = phi i32 [ %dec, %for.inc ] +; CHECK: [[COND:%[^ ]+]] = icmp sgt i32 [[PSEUDO_PHI]], %M +; CHECK: br i1 [[COND]] +define void @signed_var_imm_dec_ne(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %M) { +entry: + %cmp14 = icmp slt i32 %M, 1024 + br i1 %cmp14, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc, %entry + ret void + +for.body: ; preds = %entry, %for.inc + %iv = phi i32 [ %dec, %for.inc ], [ 1024, %entry ] + %cmp1 = icmp slt i32 %iv, 1024 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %iv + br i1 %cmp1, label %for.inc, label %if.else + +if.else: ; preds = %for.body + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %2, %mul + br label %for.inc + +for.inc: ; preds = %for.body, %if.else + %storemerge = phi i32 [ %add, %if.else ], [ %mul, %for.body ] + store i32 %storemerge, i32* %arrayidx3, align 4 + %dec = add nsw i32 %iv, -1 + %cmp = icmp ne i32 %dec, %M + br i1 %cmp, label %for.body, label %for.cond.cleanup +} + +define void @signed_var_imm_dec_eq(i32* nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %c, i32 %M) { +entry: + %cmp14 = icmp slt i32 %M, 1024 + br i1 %cmp14, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.inc, %entry + ret void + +for.body: ; preds = %entry, %for.inc + %iv = phi i32 [ %dec, %for.inc ], [ 1024, %entry ] + %cmp1 = icmp slt i32 %iv, 1024 + %arrayidx = getelementptr inbounds i32, i32* %b, i32 %iv + %0 = load i32, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %c, i32 %iv + %1 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %1, %0 + %arrayidx3 = getelementptr inbounds i32, i32* %a, i32 %iv + br i1 %cmp1, label %for.inc, label %if.else + +if.else: ; preds = %for.body + %2 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %2, %mul + br label %for.inc + +for.inc: ; preds = %for.body, %if.else + %storemerge = phi i32 [ %add, %if.else ], [ %mul, %for.body ] + store i32 %storemerge, i32* %arrayidx3, align 4 + %dec = add nsw i32 %iv, -1 + %cmp = icmp eq i32 %dec, %M + br i1 %cmp, label %for.cond.cleanup, label %for.body +}