Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1489,6 +1489,13 @@ ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); + /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p + /// Start,+,\p Stop}. + ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec, + const SCEV *MaxBECount, + unsigned BitWidth, + RangeSignHint SignHint); + /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p /// Stop} by "factoring out" a ternary expression from the add recurrence. /// Helper called by \c getRange. Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -5509,6 +5509,17 @@ ConservativeResult = ConservativeResult.intersectWith(RangeFromFactoring, RangeType); } + + // Now try symbolic BE count and more powerful methods. + MaxBECount = computeMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa(MaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && + AddRec->hasNoSelfWrap()) { + auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR( + AddRec, MaxBECount, BitWidth, SignHint); + ConservativeResult = + ConservativeResult.intersectWith(RangeFromAffineNew, RangeType); + } } return setRange(AddRec, SignHint, std::move(ConservativeResult)); @@ -5678,6 +5689,83 @@ return SR.intersectWith(UR, ConstantRange::Smallest); } +ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( + const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, + ScalarEvolution::RangeSignHint SignHint) { + assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n"); + assert(AddRec->hasNoSelfWrap() && + "This only works for non-self-wrapping AddRecs!"); + const bool IsSigned = SignHint == HINT_RANGE_SIGNED; + const SCEV *Step = AddRec->getStepRecurrence(*this); + // Let's make sure that we can prove that we do not self-wrap during + // MaxBECount iterations. We need this because MaxBECount is a maximum + // iteration count estimate, and we might infer nw from some exit for which we + // do not know max exit count (or any other side reasoning). + // TODO: Turn into assert at some point. + MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType()); + const SCEV *RangeWidth = getNegativeSCEV(getOne(AddRec->getType())); + const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step)); + const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs); + if (!isKnownPredicate(ICmpInst::ICMP_ULE, MaxBECount, MaxItersWithoutWrap)) + return ConstantRange::getFull(BitWidth); + + ICmpInst::Predicate LEPred = + IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + ICmpInst::Predicate GEPred = + IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + const SCEV *Start = AddRec->getStart(); + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // We know that there is no self-wrap. Let's take Start and End values and + // look at all intermediate values V1, V2, ..., Vn that IndVar takes during + // the iteration. They either lie inside the range [Min(Start, End), + // Max(Start, End)] or outside it: + // + // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; + // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; + // + // No self wrap flag guarantees that the intermediate values cannot be BOTH + // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that + // knowledge, let's try to prove that we are dealing with Case 1. It is so if + // both End and V1 lie at the same side of Start. + ConstantRange StartRange = + IsSigned ? getSignedRange(Start) : getUnsignedRange(Start); + ConstantRange EndRange = + IsSigned ? getSignedRange(End) : getUnsignedRange(End); + ConstantRange RangeBetween = StartRange.unionWith(EndRange); + // If they already cover full iteration space, we will know nothing useful + // even if we prove what we want to prove. + if (RangeBetween.isFullSet()) + return RangeBetween; + + // TODO: Too big expressions here may lead to exponential explosions on + // recursion. So we limit the size of operands to avoid this. Maybe in the + // future we should find a better way to deal with it. + const unsigned Threshold = 3; + if (Start->getExpressionSize() > Threshold || + Step->getExpressionSize() > Threshold) + return ConstantRange::getFull(BitWidth); + const Loop *L = AddRec->getLoop(); + const SCEV *V1 = getAddExpr(Start, Step); + const Instruction *Context = L->getLoopLatch()->getTerminator(); + auto ProveBetween = [&](ICmpInst::Predicate Pred) { + return isLoopBackedgeGuardedByCond(L, Pred, Start, V1) || + isImpliedCond(Pred, Start, V1, ICmpInst::ICMP_NE, Start, End, + Context); + }; + if (isKnownPositive(Step) && isKnownPredicate(LEPred, Start, End)) { + if (!ProveBetween(LEPred)) + return ConstantRange::getFull(BitWidth); + } else if (isKnownNegative(Step) && isKnownPredicate(GEPred, Start, End)) { + if (!ProveBetween(GEPred)) + return ConstantRange::getFull(BitWidth); + } else + return ConstantRange::getFull(BitWidth); + + // All intermediate values lie between Start and End. + return RangeBetween; +} + ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, Index: llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -7,7 +7,7 @@ ; CHECK-NEXT: %0 = zext i32 %start to i64 ; CHECK-NEXT: --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296) ; CHECK-NEXT: %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ] -; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ] ; CHECK-NEXT: --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i32 %iv, -1 Index: llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll +++ llvm/test/Transforms/IndVarSimplify/X86/eliminate-trunc.ll @@ -474,7 +474,7 @@ ; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90 ; CHECK-NEXT: [[UMIN:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMIN]], -99 +; CHECK-NEXT: [[TMP3:%.*]] = add nuw nsw i64 [[UMIN]], -99 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] Index: llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll +++ llvm/test/Transforms/IndVarSimplify/promote-iv-to-eliminate-casts.ll @@ -241,7 +241,7 @@ ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; @@ -285,7 +285,7 @@ ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; @@ -336,7 +336,7 @@ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[TMP0]], [[PREHEADER]] ] ; CHECK-NEXT: [[EL:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: store atomic i32 0, i32* [[EL]] unordered, align 4 -; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp slt i64 [[INDVARS_IV]], 1 +; CHECK-NEXT: [[LOOPCOND:%.*]] = icmp ult i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOPCOND]], label [[LOOPEXIT_LOOPEXIT:%.*]], label [[LOOP]] ; Index: llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll =================================================================== --- llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll +++ llvm/test/Transforms/IndVarSimplify/widen-loop-comp.ll @@ -550,13 +550,11 @@ define i32 @test11(i32 %start, i32* %p, i32* %q) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP1]], -1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: ; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64 @@ -565,7 +563,6 @@ ; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0 @@ -600,22 +597,19 @@ define i32 @test12(i32 %start, i32* %p, i32* %q) { ; CHECK-LABEL: @test12( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ] -; CHECK-NEXT: [[COND:%.*]] = icmp eq i64 [[INDVARS_IV]], 0 +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ] +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 0 ; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[BACKEDGE]] ; CHECK: backedge: -; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[INDVARS_IV]] to i32 -; CHECK-NEXT: [[IV_NEXT:%.*]] = add i32 [[TMP1]], -1 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], -1 ; CHECK-NEXT: [[INDEX:%.*]] = zext i32 [[IV_NEXT]] to i64 ; CHECK-NEXT: [[STORE_ADDR:%.*]] = getelementptr i32, i32* [[P:%.*]], i64 [[INDEX]] ; CHECK-NEXT: store i32 1, i32* [[STORE_ADDR]], align 4 ; CHECK-NEXT: [[LOAD_ADDR:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[STOP:%.*]] = load i32, i32* [[Q]], align 4 ; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp eq i32 [[STOP]], 0 -; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], -1 ; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[FAILURE:%.*]] ; CHECK: exit: ; CHECK-NEXT: ret i32 0