diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1507,6 +1507,13 @@ ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); + /// Determines the range for the affine non-self-wrapping SCEVAddRecExpr {\p + /// Start,+,\p Stop}. + ConstantRange getRangeForAffineNoSelfWrappingAR(const SCEVAddRecExpr *AddRec, + const SCEV *MaxBECount, + unsigned BitWidth, + RangeSignHint SignHint); + /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p /// Stop} by "factoring out" a ternary expression from the add recurrence. /// Helper called by \c getRange. diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -226,6 +226,11 @@ cl::Hidden, cl::init(true), cl::desc("When printing analysis, include information on every instruction")); +static cl::opt UseExpensiveRangeSharpening( + "scalar-evolution-use-expensive-range-sharpening", cl::Hidden, + cl::init(false), + cl::desc("Use more powerful methods of sharpening expression ranges. May " + "be costly in terms of compile time")); //===----------------------------------------------------------------------===// // SCEV class definitions @@ -5527,6 +5532,20 @@ ConservativeResult = ConservativeResult.intersectWith(RangeFromFactoring, RangeType); } + + // Now try symbolic BE count and more powerful methods. + if (UseExpensiveRangeSharpening) { + const SCEV *SymbolicMaxBECount = + getSymbolicMaxBackedgeTakenCount(AddRec->getLoop()); + if (!isa(SymbolicMaxBECount) && + getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && + AddRec->hasNoSelfWrap()) { + auto RangeFromAffineNew = getRangeForAffineNoSelfWrappingAR( + AddRec, SymbolicMaxBECount, BitWidth, SignHint); + ConservativeResult = + ConservativeResult.intersectWith(RangeFromAffineNew, RangeType); + } + } } return setRange(AddRec, SignHint, std::move(ConservativeResult)); @@ -5696,6 +5715,71 @@ return SR.intersectWith(UR, ConstantRange::Smallest); } +ConstantRange ScalarEvolution::getRangeForAffineNoSelfWrappingAR( + const SCEVAddRecExpr *AddRec, const SCEV *MaxBECount, unsigned BitWidth, + ScalarEvolution::RangeSignHint SignHint) { + assert(AddRec->isAffine() && "Non-affine AddRecs are not suppored!\n"); + assert(AddRec->hasNoSelfWrap() && + "This only works for non-self-wrapping AddRecs!"); + const bool IsSigned = SignHint == HINT_RANGE_SIGNED; + const SCEV *Step = AddRec->getStepRecurrence(*this); + // Only deal with constant step to save compile time. + if (!isa(Step)) + return ConstantRange::getFull(BitWidth); + // Let's make sure that we can prove that we do not self-wrap during + // MaxBECount iterations. We need this because MaxBECount is a maximum + // iteration count estimate, and we might infer nw from some exit for which we + // do not know max exit count (or any other side reasoning). + // TODO: Turn into assert at some point. + MaxBECount = getNoopOrZeroExtend(MaxBECount, AddRec->getType()); + const SCEV *RangeWidth = getMinusOne(AddRec->getType()); + const SCEV *StepAbs = getUMinExpr(Step, getNegativeSCEV(Step)); + const SCEV *MaxItersWithoutWrap = getUDivExpr(RangeWidth, StepAbs); + if (!isKnownPredicateViaConstantRanges(ICmpInst::ICMP_ULE, MaxBECount, + MaxItersWithoutWrap)) + return ConstantRange::getFull(BitWidth); + + ICmpInst::Predicate LEPred = + IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; + ICmpInst::Predicate GEPred = + IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; + const SCEV *End = AddRec->evaluateAtIteration(MaxBECount, *this); + + // We know that there is no self-wrap. Let's take Start and End values and + // look at all intermediate values V1, V2, ..., Vn that IndVar takes during + // the iteration. They either lie inside the range [Min(Start, End), + // Max(Start, End)] or outside it: + // + // Case 1: RangeMin ... Start V1 ... VN End ... RangeMax; + // Case 2: RangeMin Vk ... V1 Start ... End Vn ... Vk + 1 RangeMax; + // + // No self wrap flag guarantees that the intermediate values cannot be BOTH + // outside and inside the range [Min(Start, End), Max(Start, End)]. Using that + // knowledge, let's try to prove that we are dealing with Case 1. It is so if + // Start <= End and step is positive, or Start >= End and step is negative. + const SCEV *Start = AddRec->getStart(); + ConstantRange StartRange = getRangeRef(Start, SignHint); + ConstantRange EndRange = getRangeRef(End, SignHint); + ConstantRange RangeBetween = StartRange.unionWith(EndRange); + // If they already cover full iteration space, we will know nothing useful + // even if we prove what we want to prove. + if (RangeBetween.isFullSet()) + return RangeBetween; + // Only deal with ranges that do not wrap (i.e. RangeMin < RangeMax). + bool IsWrappedSet = IsSigned ? RangeBetween.isSignWrappedSet() + : RangeBetween.isWrappedSet(); + if (IsWrappedSet) + return ConstantRange::getFull(BitWidth); + + if (isKnownPositive(Step) && + isKnownPredicateViaConstantRanges(LEPred, Start, End)) + return RangeBetween; + else if (isKnownNegative(Step) && + isKnownPredicateViaConstantRanges(GEPred, Start, End)) + return RangeBetween; + return ConstantRange::getFull(BitWidth); +} + ConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, const SCEV *Step, const SCEV *MaxBECount, diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll --- a/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll +++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-symbolic-becount.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -analyze -scalar-evolution -enable-new-pm=0 | FileCheck %s -; RUN: opt < %s -S -passes='print' 2>&1 | FileCheck %s +; RUN: opt < %s -S -scalar-evolution-use-expensive-range-sharpening -analyze -scalar-evolution -enable-new-pm=0 | FileCheck %s +; RUN: opt < %s -S -scalar-evolution-use-expensive-range-sharpening -passes='print' 2>&1 | FileCheck %s define i32 @test_01(i32 %start, i32* %p, i32* %q) { ; CHECK-LABEL: 'test_01' @@ -8,7 +8,7 @@ ; CHECK-NEXT: %0 = zext i32 %start to i64 ; CHECK-NEXT: --> (zext i32 %start to i64) U: [0,4294967296) S: [0,4294967296) ; CHECK-NEXT: %indvars.iv = phi i64 [ %indvars.iv.next, %backedge ], [ %0, %entry ] -; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<%loop> U: [-4294967295,4294967296) S: [-4294967295,4294967296) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(zext i32 %start to i64),+,-1}<%loop> U: [0,4294967296) S: [0,4294967296) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv = phi i32 [ %start, %entry ], [ %iv.next, %backedge ] ; CHECK-NEXT: --> {%start,+,-1}<%loop> U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.next = add i32 %iv, -1 @@ -22,7 +22,7 @@ ; CHECK-NEXT: %stop = load i32, i32* %load.addr, align 4 ; CHECK-NEXT: --> %stop U: full-set S: full-set Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %indvars.iv.next = add nsw i64 %indvars.iv, -1 -; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64)),+,-1}<%loop> U: [-4294967296,4294967295) S: [-4294967296,4294967295) Exits: <> LoopDispositions: { %loop: Computable } +; CHECK-NEXT: --> {(-1 + (zext i32 %start to i64)),+,-1}<%loop> U: [-4294967296,4294967295) S: [-1,4294967295) Exits: <> LoopDispositions: { %loop: Computable } ; CHECK-NEXT: Determining loop execution counts for: @test_01 ; CHECK-NEXT: Loop %loop: Unpredictable backedge-taken count. ; CHECK-NEXT: exit count for loop: (zext i32 %start to i64)