Index: llvm/include/llvm/Analysis/ScalarEvolution.h =================================================================== --- llvm/include/llvm/Analysis/ScalarEvolution.h +++ llvm/include/llvm/Analysis/ScalarEvolution.h @@ -1561,6 +1561,12 @@ ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop, const SCEV *MaxBECount, unsigned BitWidth); + /// If the unknown expression U corresponds to a simple recurrence, return + /// a constant range which represents the entire recurrence. Note that + /// *add* recurrences aren't represented by SCEVUnknowns and thus don't use + /// this mechanism. + ConstantRange getRangeForUnknownRecurrence(const SCEVUnknown *U); + /// We know that there is no SCEV for the specified value. Analyze the /// expression. const SCEV *createSCEV(Value *V); Index: llvm/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/lib/Analysis/ScalarEvolution.cpp +++ llvm/lib/Analysis/ScalarEvolution.cpp @@ -5645,6 +5645,71 @@ } } +ConstantRange ScalarEvolution:: +getRangeForUnknownRecurrence(const SCEVUnknown *U) { + const DataLayout &DL = getDataLayout(); + + unsigned BitWidth = getTypeSizeInBits(U->getType()); + ConstantRange CR(BitWidth, /*isFullSet=*/true); + + // Match a simple recurrence of the form: , and then + // use information about the trip count to improve our available range. Note + // that the trip count independent cases are already handled by known bits. + auto *P = dyn_cast(U->getValue()); + if (!P) + return CR; + + BinaryOperator *BO; + Value *Start, *Step; + if (!matchSimpleRecurrence(P, BO, Start, Step)) + return CR; + + // If we found a recurrence, we must be in a loop -- unless we're + // in unreachable code where dominance collapses. Note that BO might + // be in some subloop of L, and that's completely okay. + auto *L = LI.getLoopFor(P->getParent()); + if (!L) + return CR; + assert(L->getHeader() == P->getParent()); + if (!L->contains(BO->getParent())) + // NOTE: This bailout should be an assert instead. However, asserting + // the condition here exposes a case where LoopFusion is querying SCEV + // with malformed loop information during the midst of the transform. + // There doesn't appear to be an obvious fix, so for the moment bailout + // until the caller issue can be fixed. PR49566 tracks the bug. + return CR; + + // TODO: Handle ashr and lshr cases to increase minimum value reported + if (BO->getOpcode() != Instruction::Shl || BO->getOperand(0) != P) + return CR; + + unsigned TC = getSmallConstantMaxTripCount(L); + if (!TC || TC >= BitWidth) + return CR; + + auto KnownStart = computeKnownBits(Start, DL, 0, &AC, nullptr, &DT); + auto KnownStep = computeKnownBits(Step, DL, 0, &AC, nullptr, &DT); + assert(KnownStart.getBitWidth() == BitWidth && + KnownStep.getBitWidth() == BitWidth); + + // Compute total shift amount, being careful of overflow and bitwidths. + auto MaxShiftAmt = KnownStep.getMaxValue(); + bool Overflow = false; + auto TotalShift = MaxShiftAmt.umul_ov(APInt(BitWidth, TC-1, false), Overflow); + if (Overflow) + return CR; + + // Iff no bits are shifted out, value increases on every shift. + auto KnownEnd = KnownBits::shl(KnownStart, + KnownBits::makeConstant(TotalShift)); + if (TotalShift.ult(KnownStart.countMinLeadingZeros())) + CR = CR.intersectWith(ConstantRange(KnownStart.getMinValue(), + KnownEnd.getMaxValue() + 1)); + return CR; +} + + + /// Determine the range for a particular SCEV. If SignHint is /// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges /// with a "cleaner" unsigned (resp. signed) representation. @@ -5845,12 +5910,19 @@ } if (const SCEVUnknown *U = dyn_cast(S)) { + // Check if the IR explicitly contains !range metadata. Optional MDRange = GetRangeFromMetadata(U->getValue()); if (MDRange.hasValue()) ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(), RangeType); + // Use facts about recurrences in the underlying IR. Note that add + // recurrences are AddRecExprs and thus don't hit this path. This + // primarily handles shift recurrences. + auto CR = getRangeForUnknownRecurrence(U); + ConservativeResult = ConservativeResult.intersectWith(CR); + // See if ValueTracking can give us a useful range. const DataLayout &DL = getDataLayout(); KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); Index: llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll =================================================================== --- llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll +++ llvm/test/Analysis/ScalarEvolution/shift-recurrences.ll @@ -193,11 +193,11 @@ ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,5) S: [0,5) Exits: 4 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl = phi i64 [ 4, %entry ], [ %iv.shl.next, %loop ] -; CHECK-NEXT: --> %iv.shl U: [0,-3) S: [-9223372036854775808,9223372036854775801) Exits: 64 LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %iv.shl U: [4,65) S: [4,65) Exits: 64 LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,6) S: [1,6) Exits: 5 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl.next = shl i64 %iv.shl, 1 -; CHECK-NEXT: --> (2 * %iv.shl) U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: 128 LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> (2 * %iv.shl) U: [8,129) S: [8,129) Exits: 128 LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @test_shl2 ; CHECK-NEXT: Loop %loop: backedge-taken count is 4 ; CHECK-NEXT: Loop %loop: max backedge-taken count is 4 @@ -227,7 +227,7 @@ ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,5) S: [0,5) Exits: 4 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl = phi i64 [ 4, %entry ], [ %iv.shl.next, %loop ] -; CHECK-NEXT: --> %iv.shl U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: <> LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %iv.shl U: [4,65) S: [4,65) Exits: <> LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,6) S: [1,6) Exits: 5 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl.next = shl i64 %iv.shl, %shiftamt @@ -260,11 +260,11 @@ ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,61) S: [0,61) Exits: 60 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl = phi i64 [ 4, %entry ], [ %iv.shl.next, %loop ] -; CHECK-NEXT: --> %iv.shl U: [0,-3) S: [-9223372036854775808,9223372036854775801) Exits: 4611686018427387904 LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %iv.shl U: [4,4611686018427387905) S: [4,4611686018427387905) Exits: 4611686018427387904 LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,62) S: [1,62) Exits: 61 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl.next = shl i64 %iv.shl, 1 -; CHECK-NEXT: --> (2 * %iv.shl) U: [0,-7) S: [-9223372036854775808,9223372036854775801) Exits: -9223372036854775808 LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> (2 * %iv.shl) U: [8,-9223372036854775807) S: [-9223372036854775808,9223372036854775801) Exits: -9223372036854775808 LoopDispositions: { %loop: Variant } ; CHECK-NEXT: Determining loop execution counts for: @test_shl4 ; CHECK-NEXT: Loop %loop: backedge-taken count is 60 ; CHECK-NEXT: Loop %loop: max backedge-taken count is 60 @@ -324,7 +324,7 @@ ; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] ; CHECK-NEXT: --> {0,+,1}<%loop> U: [0,5) S: [0,5) Exits: 4 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %iv.shl = phi i64 [ 4, %entry ], [ %iv.shl.next, %loop ] -; CHECK-NEXT: --> %iv.shl U: [0,-3) S: [-9223372036854775808,9223372036854775805) Exits: 16 LoopDispositions: { %loop: Variant } +; CHECK-NEXT: --> %iv.shl U: [4,65) S: [4,65) Exits: 16 LoopDispositions: { %loop: Variant } ; CHECK-NEXT: %iv.next = add i64 %iv, 1 ; CHECK-NEXT: --> {1,+,1}<%loop> U: [1,6) S: [1,6) Exits: 5 LoopDispositions: { %loop: Computable } ; CHECK-NEXT: %shiftamt = and i64 %iv, 1