Index: llvm/trunk/include/llvm/ADT/APInt.h =================================================================== --- llvm/trunk/include/llvm/ADT/APInt.h +++ llvm/trunk/include/llvm/ADT/APInt.h @@ -31,6 +31,7 @@ template class SmallVectorImpl; template class ArrayRef; +template class Optional; class APInt; @@ -2166,6 +2167,41 @@ /// Return A sign-divided by B, rounded by the given rounding mode. APInt RoundingSDiv(const APInt &A, const APInt &B, APInt::Rounding RM); +/// Let q(n) = An^2 + Bn + C, and BW = bit width of the value range +/// (e.g. 32 for i32). +/// This function finds the smallest number n, such that +/// (a) n >= 0 and q(n) = 0, or +/// (b) n >= 1 and q(n-1) and q(n), when evaluated in the set of all +/// integers, belong to two different intervals [Rk, Rk+R), +/// where R = 2^BW, and k is an integer. +/// The idea here is to find when q(n) "overflows" 2^BW, while at the +/// same time "allowing" subtraction. In unsigned modulo arithmetic a +/// subtraction (treated as addition of negated numbers) would always +/// count as an overflow, but here we want to allow values to decrease +/// and increase as long as they are within the same interval. +/// Specifically, adding of two negative numbers should not cause an +/// overflow (as long as the magnitude does not exceed the bith width). +/// On the other hand, given a positive number, adding a negative +/// number to it can give a negative result, which would cause the +/// value to go from [-2^BW, 0) to [0, 2^BW). In that sense, zero is +/// treated as a special case of an overflow. +/// +/// This function returns None if after finding k that minimizes the +/// positive solution to q(n) = kR, both solutions are contained between +/// two consecutive integers. +/// +/// There are cases where q(n) > T, and q(n+1) < T (assuming evaluation +/// in arithmetic modulo 2^BW, and treating the values as signed) by the +/// virtue of *signed* overflow. This function will *not* find such an n, +/// however it may find a value of n satisfying the inequalities due to +/// an *unsigned* overflow (if the values are treated as unsigned). +/// To find a solution for a signed overflow, treat it as a problem of +/// finding an unsigned overflow with a range with of BW-1. +/// +/// The returned value may have a different bit width from the input +/// coefficients. +Optional SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, + unsigned RangeWidth); } // End of APIntOps namespace // See friend declaration above. This additional declaration is required in Index: llvm/trunk/lib/Analysis/ScalarEvolution.cpp =================================================================== --- llvm/trunk/lib/Analysis/ScalarEvolution.cpp +++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp @@ -8344,69 +8344,273 @@ return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); } -/// Find the roots of the quadratic equation for the given quadratic chrec -/// {L,+,M,+,N}. This returns either the two roots (which might be the same) or -/// two SCEVCouldNotCompute objects. -static Optional> -SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { +/// For a given quadratic addrec, generate coefficients of the corresponding +/// quadratic equation, multiplied by a common value to ensure that they are +/// integers. +/// The returned value is a tuple { A, B, C, M, BitWidth }, where +/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C +/// were multiplied by, and BitWidth is the bit width of the original addrec +/// coefficients. +/// This function returns None if the addrec coefficients are not compile- +/// time constants. +static Optional> +GetQuadraticEquation(const SCEVAddRecExpr *AddRec) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0)); const SCEVConstant *MC = dyn_cast(AddRec->getOperand(1)); const SCEVConstant *NC = dyn_cast(AddRec->getOperand(2)); + LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: " + << *AddRec << '\n'); // We currently can only solve this if the coefficients are constants. - if (!LC || !MC || !NC) + if (!LC || !MC || !NC) { + LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n"); return None; + } - uint32_t BitWidth = LC->getAPInt().getBitWidth(); - const APInt &L = LC->getAPInt(); - const APInt &M = MC->getAPInt(); - const APInt &N = NC->getAPInt(); - APInt Two(BitWidth, 2); - - // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C - - // The A coefficient is N/2 - APInt A = N.sdiv(Two); - - // The B coefficient is M-N/2 - APInt B = M; - B -= A; // A is the same as N/2. - - // The C coefficient is L. - const APInt& C = L; - - // Compute the B^2-4ac term. - APInt SqrtTerm = B; - SqrtTerm *= B; - SqrtTerm -= 4 * (A * C); + APInt L = LC->getAPInt(); + APInt M = MC->getAPInt(); + APInt N = NC->getAPInt(); + assert(!N.isNullValue() && "This is not a quadratic addrec"); + + unsigned BitWidth = LC->getAPInt().getBitWidth(); + unsigned NewWidth = BitWidth + 1; + LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: " + << BitWidth << '\n'); + // The sign-extension (as opposed to a zero-extension) here matches the + // extension used in SolveQuadraticEquationWrap (with the same motivation). + N = N.sext(NewWidth); + M = M.sext(NewWidth); + L = L.sext(NewWidth); + + // The increments are M, M+N, M+2N, ..., so the accumulated values are + // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is, + // L+M, L+2M+N, L+3M+3N, ... + // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N. + // + // The equation Acc = 0 is then + // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0. + // In a quadratic form it becomes: + // N n^2 + (2M-N) n + 2L = 0. + + APInt A = N; + APInt B = 2 * M - A; + APInt C = 2 * L; + APInt T = APInt(NewWidth, 2); + LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B + << "x + " << C << ", coeff bw: " << NewWidth + << ", multiplied by " << T << '\n'); + return std::make_tuple(A, B, C, T, BitWidth); +} + +/// Helper function to compare optional APInts: +/// (a) if X and Y both exist, return min(X, Y), +/// (b) if neither X nor Y exist, return None, +/// (c) if exactly one of X and Y exists, return that value. +static Optional MinOptional(Optional X, Optional Y) { + if (X.hasValue() && Y.hasValue()) { + unsigned W = std::max(X->getBitWidth(), Y->getBitWidth()); + APInt XW = X->sextOrSelf(W); + APInt YW = Y->sextOrSelf(W); + return XW.slt(YW) ? *X : *Y; + } + if (!X.hasValue() && !Y.hasValue()) + return None; + return X.hasValue() ? *X : *Y; +} - if (SqrtTerm.isNegative()) { - // The loop is provably infinite. +/// Helper function to truncate an optional APInt to a given BitWidth. +/// When solving addrec-related equations, it is preferable to return a value +/// that has the same bit width as the original addrec's coefficients. If the +/// solution fits in the original bit width, truncate it (except for i1). +/// Returning a value of a different bit width may inhibit some optimizations. +/// +/// In general, a solution to a quadratic equation generated from an addrec +/// may require BW+1 bits, where BW is the bit width of the addrec's +/// coefficients. The reason is that the coefficients of the quadratic +/// equation are BW+1 bits wide (to avoid truncation when converting from +/// the addrec to the equation). +static Optional TruncIfPossible(Optional X, unsigned BitWidth) { + if (!X.hasValue()) return None; - } + unsigned W = X->getBitWidth(); + if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth)) + return X->trunc(BitWidth); + return X; +} + +/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n +/// iterations. The values L, M, N are assumed to be signed, and they +/// should all have the same bit widths. +/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW, +/// where BW is the bit width of the addrec's coefficients. +/// If the calculated value is a BW-bit integer (for BW > 1), it will be +/// returned as such, otherwise the bit width of the returned value may +/// be greater than BW. +/// +/// This function returns None if +/// (a) the addrec coefficients are not constant, or +/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases +/// like x^2 = 5, no integer solutions exist, in other cases an integer +/// solution may exist, but SolveQuadraticEquationWrap may fail to find it. +static Optional +SolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { + APInt A, B, C, M; + unsigned BitWidth; + auto T = GetQuadraticEquation(AddRec); + if (!T.hasValue()) + return None; + + std::tie(A, B, C, M, BitWidth) = *T; + LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n"); + Optional X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1); + if (!X.hasValue()) + return None; + + ConstantInt *CX = ConstantInt::get(SE.getContext(), *X); + ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE); + if (!V->isZero()) + return None; + + return TruncIfPossible(X, BitWidth); +} - // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest - // integer value or else APInt::sqrt() will assert. - APInt SqrtVal = SqrtTerm.sqrt(); - - // Compute the two solutions for the quadratic formula. - // The divisions must be performed as signed divisions. - APInt NegB = -std::move(B); - APInt TwoA = std::move(A); - TwoA <<= 1; - if (TwoA.isNullValue()) +/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n +/// iterations. The values M, N are assumed to be signed, and they +/// should all have the same bit widths. +/// Find the least n such that c(n) does not belong to the given range, +/// while c(n-1) does. +/// +/// This function returns None if +/// (a) the addrec coefficients are not constant, or +/// (b) SolveQuadraticEquationWrap was unable to find a solution for the +/// bounds of the range. +static Optional +SolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, + const ConstantRange &Range, ScalarEvolution &SE) { + assert(AddRec->getOperand(0)->isZero() && + "Starting value of addrec should be 0"); + LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range " + << Range << ", addrec " << *AddRec << '\n'); + // This case is handled in getNumIterationsInRange. Here we can assume that + // we start in the range. + assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) && + "Addrec's initial value should be in range"); + + APInt A, B, C, M; + unsigned BitWidth; + auto T = GetQuadraticEquation(AddRec); + if (!T.hasValue()) return None; - LLVMContext &Context = SE.getContext(); + // Be careful about the return value: there can be two reasons for not + // returning an actual number. First, if no solutions to the equations + // were found, and second, if the solutions don't leave the given range. + // The first case means that the actual solution is "unknown", the second + // means that it's known, but not valid. If the solution is unknown, we + // cannot make any conclusions. + // Return a pair: the optional solution and a flag indicating if the + // solution was found. + auto SolveForBoundary = [&](APInt Bound) -> std::pair,bool> { + // Solve for signed overflow and unsigned overflow, pick the lower + // solution. + LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary " + << Bound << " (before multiplying by " << M << ")\n"); + Bound *= M; // The quadratic equation multiplier. + + Optional SO = None; + if (BitWidth > 1) { + LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " + "signed overflow\n"); + SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth); + } + LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " + "unsigned overflow\n"); + Optional UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, + BitWidth+1); + + auto LeavesRange = [&] (const APInt &X) { + ConstantInt *C0 = ConstantInt::get(SE.getContext(), X); + ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE); + if (Range.contains(V0->getValue())) + return false; + // X should be at least 1, so X-1 is non-negative. + ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1); + ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE); + if (Range.contains(V1->getValue())) + return true; + return false; + }; + + // If SolveQuadraticEquationWrap returns None, it means that there can + // be a solution, but the function failed to find it. We cannot treat it + // as "no solution". + if (!SO.hasValue() || !UO.hasValue()) + return { None, false }; + + // Check the smaller value first to see if it leaves the range. + // At this point, both SO and UO must have values. + Optional Min = MinOptional(SO, UO); + if (LeavesRange(*Min)) + return { Min, true }; + Optional Max = Min == SO ? UO : SO; + if (LeavesRange(*Max)) + return { Max, true }; + + // Solutions were found, but were eliminated, hence the "true". + return { None, true }; + }; + + std::tie(A, B, C, M, BitWidth) = *T; + // Lower bound is inclusive, subtract 1 to represent the exiting value. + APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1; + APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth()); + auto SL = SolveForBoundary(Lower); + auto SU = SolveForBoundary(Upper); + // If any of the solutions was unknown, no meaninigful conclusions can + // be made. + if (!SL.second || !SU.second) + return None; - ConstantInt *Solution1 = - ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); - ConstantInt *Solution2 = - ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + // Claim: The correct solution is not some value between Min and Max. + // + // Justification: Assuming that Min and Max are different values, one of + // them is when the first signed overflow happens, the other is when the + // first unsigned overflow happens. Crossing the range boundary is only + // possible via an overflow (treating 0 as a special case of it, modeling + // an overflow as crossing k*2^W for some k). + // + // The interesting case here is when Min was eliminated as an invalid + // solution, but Max was not. The argument is that if there was another + // overflow between Min and Max, it would also have been eliminated if + // it was considered. + // + // For a given boundary, it is possible to have two overflows of the same + // type (signed/unsigned) without having the other type in between: this + // can happen when the vertex of the parabola is between the iterations + // corresponding to the overflows. This is only possible when the two + // overflows cross k*2^W for the same k. In such case, if the second one + // left the range (and was the first one to do so), the first overflow + // would have to enter the range, which would mean that either we had left + // the range before or that we started outside of it. Both of these cases + // are contradictions. + // + // Claim: In the case where SolveForBoundary returns None, the correct + // solution is not some value between the Max for this boundary and the + // Min of the other boundary. + // + // Justification: Assume that we had such Max_A and Min_B corresponding + // to range boundaries A and B and such that Max_A < Min_B. If there was + // a solution between Max_A and Min_B, it would have to be caused by an + // overflow corresponding to either A or B. It cannot correspond to B, + // since Min_B is the first occurrence of such an overflow. If it + // corresponded to A, it would have to be either a signed or an unsigned + // overflow that is larger than both eliminated overflows for A. But + // between the eliminated overflows and this overflow, the values would + // cover the entire value space, thus crossing the other boundary, which + // is a contradiction. - return std::make_pair(cast(SE.getConstant(Solution1)), - cast(SE.getConstant(Solution2))); + return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth); } ScalarEvolution::ExitLimit @@ -8441,23 +8645,12 @@ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { - if (auto Roots = SolveQuadraticEquation(AddRec, *this)) { - const SCEVConstant *R1 = Roots->first; - const SCEVConstant *R2 = Roots->second; - // Pick the smallest positive root value. - if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( - CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (!CB->getZExtValue()) - std::swap(R1, R2); // R1 is the minimum root now. - - // We can only use this value if the chrec ends up with an exact zero - // value at this index. When solving for "X*X != 5", for example, we - // should not accept a root of 2. - const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); - if (Val->isZero()) - // We found a quadratic root! - return ExitLimit(R1, R1, false, Predicates); - } + // We can only use this value if the chrec ends up with an exact zero + // value at this index. When solving for "X*X != 5", for example, we + // should not accept a root of 2. + if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) { + const auto *R = cast(getConstant(S.getValue())); + return ExitLimit(R, R, false, Predicates); } return getCouldNotCompute(); } @@ -10565,52 +10758,11 @@ ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); - } else if (isQuadratic()) { - // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the - // quadratic equation to solve it. To do this, we must frame our problem in - // terms of figuring out when zero is crossed, instead of when - // Range.getUpper() is crossed. - SmallVector NewOps(op_begin(), op_end()); - NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper())); - const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap); - - // Next, solve the constructed addrec - if (auto Roots = - SolveQuadraticEquation(cast(NewAddRec), SE)) { - const SCEVConstant *R1 = Roots->first; - const SCEVConstant *R2 = Roots->second; - // Pick the smallest positive root value. - if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( - ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (!CB->getZExtValue()) - std::swap(R1, R2); // R1 is the minimum root now. - - // Make sure the root is not off by one. The returned iteration should - // not be in the range, but the previous one should be. When solving - // for "X*X < 5", for example, we should not return a root of 2. - ConstantInt *R1Val = - EvaluateConstantChrecAtConstant(this, R1->getValue(), SE); - if (Range.contains(R1Val->getValue())) { - // The next iteration must be out of the range... - ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getAPInt() + 1); - - R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); - if (!Range.contains(R1Val->getValue())) - return SE.getConstant(NextVal); - return SE.getCouldNotCompute(); // Something strange happened - } - - // If R1 was not in the range, then it is a good return value. Make - // sure that R1-1 WAS in the range though, just in case. - ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getAPInt() - 1); - R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); - if (Range.contains(R1Val->getValue())) - return R1; - return SE.getCouldNotCompute(); // Something strange happened - } - } + } + + if (isQuadratic()) { + if (auto S = SolveQuadraticAddRecRange(this, Range, SE)) + return SE.getConstant(S.getValue()); } return SE.getCouldNotCompute(); Index: llvm/trunk/lib/Support/APInt.cpp =================================================================== --- llvm/trunk/lib/Support/APInt.cpp +++ llvm/trunk/lib/Support/APInt.cpp @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" #include "llvm/Config/llvm-config.h" @@ -2707,3 +2708,193 @@ } llvm_unreachable("Unknown APInt::Rounding enum"); } + +Optional +llvm::APIntOps::SolveQuadraticEquationWrap(APInt A, APInt B, APInt C, + unsigned RangeWidth) { + unsigned CoeffWidth = A.getBitWidth(); + assert(CoeffWidth == B.getBitWidth() && CoeffWidth == C.getBitWidth()); + assert(RangeWidth <= CoeffWidth && + "Value range width should be less than coefficient width"); + assert(RangeWidth > 1 && "Value range bit width should be > 1"); + + LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B + << "x + " << C << ", rw:" << RangeWidth << '\n'); + + // Identify 0 as a (non)solution immediately. + if (C.sextOrTrunc(RangeWidth).isNullValue() ) { + LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); + return APInt(CoeffWidth, 0); + } + + // The result of APInt arithmetic has the same bit width as the operands, + // so it can actually lose high bits. A product of two n-bit integers needs + // 2n-1 bits to represent the full value. + // The operation done below (on quadratic coefficients) that can produce + // the largest value is the evaluation of the equation during bisection, + // which needs 3 times the bitwidth of the coefficient, so the total number + // of required bits is 3n. + // + // The purpose of this extension is to simulate the set Z of all integers, + // where n+1 > n for all n in Z. In Z it makes sense to talk about positive + // and negative numbers (not so much in a modulo arithmetic). The method + // used to solve the equation is based on the standard formula for real + // numbers, and uses the concepts of "positive" and "negative" with their + // usual meanings. + CoeffWidth *= 3; + A = A.sext(CoeffWidth); + B = B.sext(CoeffWidth); + C = C.sext(CoeffWidth); + + // Make A > 0 for simplicity. Negate cannot overflow at this point because + // the bit width has increased. + if (A.isNegative()) { + A.negate(); + B.negate(); + C.negate(); + } + + // Solving an equation q(x) = 0 with coefficients in modular arithmetic + // is really solving a set of equations q(x) = kR for k = 0, 1, 2, ..., + // and R = 2^BitWidth. + // Since we're trying not only to find exact solutions, but also values + // that "wrap around", such a set will always have a solution, i.e. an x + // that satisfies at least one of the equations, or such that |q(x)| + // exceeds kR, while |q(x-1)| for the same k does not. + // + // We need to find a value k, such that Ax^2 + Bx + C = kR will have a + // positive solution n (in the above sense), and also such that the n + // will be the least among all solutions corresponding to k = 0, 1, ... + // (more precisely, the least element in the set + // { n(k) | k is such that a solution n(k) exists }). + // + // Consider the parabola (over real numbers) that corresponds to the + // quadratic equation. Since A > 0, the arms of the parabola will point + // up. Picking different values of k will shift it up and down by R. + // + // We want to shift the parabola in such a way as to reduce the problem + // of solving q(x) = kR to solving shifted_q(x) = 0. + // (The interesting solutions are the ceilings of the real number + // solutions.) + APInt R = APInt::getOneBitSet(CoeffWidth, RangeWidth); + APInt TwoA = 2 * A; + APInt SqrB = B * B; + bool PickLow; + + auto RoundUp = [] (const APInt &V, const APInt &A) { + assert(A.isStrictlyPositive()); + APInt T = V.abs().urem(A); + if (T.isNullValue()) + return V; + return V.isNegative() ? V+T : V+(A-T); + }; + + // The vertex of the parabola is at -B/2A, but since A > 0, it's negative + // iff B is positive. + if (B.isNonNegative()) { + // If B >= 0, the vertex it at a negative location (or at 0), so in + // order to have a non-negative solution we need to pick k that makes + // C-kR negative. To satisfy all the requirements for the solution + // that we are looking for, it needs to be closest to 0 of all k. + C = C.srem(R); + if (C.isStrictlyPositive()) + C -= R; + // Pick the greater solution. + PickLow = false; + } else { + // If B < 0, the vertex is at a positive location. For any solution + // to exist, the discriminant must be non-negative. This means that + // C-kR <= B^2/4A is a necessary condition for k, i.e. there is a + // lower bound on values of k: kR >= C - B^2/4A. + APInt LowkR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. + // Round LowkR up (towards +inf) to the nearest kR. + LowkR = RoundUp(LowkR, R); + + // If there exists k meeting the condition above, and such that + // C-kR > 0, there will be two positive real number solutions of + // q(x) = kR. Out of all such values of k, pick the one that makes + // C-kR closest to 0, (i.e. pick maximum k such that C-kR > 0). + // In other words, find maximum k such that LowkR <= kR < C. + if (C.sgt(LowkR)) { + // If LowkR < C, then such a k is guaranteed to exist because + // LowkR itself is a multiple of R. + C -= -RoundUp(-C, R); // C = C - RoundDown(C, R) + // Pick the smaller solution. + PickLow = true; + } else { + // If C-kR < 0 for all potential k's, it means that one solution + // will be negative, while the other will be positive. The positive + // solution will shift towards 0 if the parabola is moved up. + // Pick the kR closest to the lower bound (i.e. make C-kR closest + // to 0, or in other words, out of all parabolas that have solutions, + // pick the one that is the farthest "up"). + // Since LowkR is itself a multiple of R, simply take C-LowkR. + C -= LowkR; + // Pick the greater solution. + PickLow = false; + } + } + + LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " + << B << "x + " << C << ", rw:" << RangeWidth << '\n'); + + APInt D = SqrB - 4*A*C; + assert(D.isNonNegative() && "Negative discriminant"); + APInt SQ = D.sqrt(); + + APInt Q = SQ * SQ; + bool InexactSQ = Q != D; + // The calculated SQ may actually be greater than the exact (non-integer) + // value. If that's the case, decremement SQ to get a value that is lower. + if (Q.sgt(D)) + SQ -= 1; + + APInt X; + APInt Rem; + + // SQ is rounded down (i.e SQ * SQ <= D), so the roots may be inexact. + // When using the quadratic formula directly, the calculated low root + // may be greater than the exact one, since we would be subtracting SQ. + // To make sure that the calculated root is not greater than the exact + // one, subtract SQ+1 when calculating the low root (for inexact value + // of SQ). + if (PickLow) + APInt::sdivrem(-B - (SQ+InexactSQ), TwoA, X, Rem); + else + APInt::sdivrem(-B + SQ, TwoA, X, Rem); + + // The updated coefficients should be such that the (exact) solution is + // positive. Since APInt division rounds towards 0, the calculated one + // can be 0, but cannot be negative. + assert(X.isNonNegative() && "Solution should be non-negative"); + + if (!InexactSQ && Rem.isNullValue()) { + LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); + return X; + } + + assert((SQ*SQ).sle(D) && "SQ = |_sqrt(D)_|, so SQ*SQ <= D"); + // The exact value of the square root of D should be between SQ and SQ+1. + // This implies that the solution should be between that corresponding to + // SQ (i.e. X) and that corresponding to SQ+1. + // + // The calculated X cannot be greater than the exact (real) solution. + // Actually it must be strictly less than the exact solution, while + // X+1 will be greater than or equal to it. + + APInt VX = (A*X + B)*X + C; + APInt VY = VX + TwoA*X + A + B; + bool SignChange = VX.isNegative() != VY.isNegative() || + VX.isNullValue() != VY.isNullValue(); + // If the sign did not change between X and X+1, X is not a valid solution. + // This could happen when the actual (exact) roots don't have an integer + // between them, so they would both be contained between X and X+1. + if (!SignChange) { + LLVM_DEBUG(dbgs() << __func__ << ": no valid solution\n"); + return None; + } + + X += 1; + LLVM_DEBUG(dbgs() << __func__ << ": solution (wrap): " << X << '\n'); + return X; +} Index: llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll @@ -0,0 +1,100 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f0': +; CHECK-NEXT: Classifying expressions for: @f0 +; CHECK-NEXT: %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ] +; CHECK-NEXT: --> {2,+,1}<%b1> U: [2,4) S: [2,4) Exits: 3 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ] +; CHECK-NEXT: --> {1,+,2,+,1}<%b1> U: full-set S: full-set Exits: 3 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v2 = add nsw i16 %v0, 1 +; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,5) S: [3,5) Exits: 4 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v3 = add nsw i16 %v1, %v0 +; CHECK-NEXT: --> {3,+,3,+,1}<%b1> U: full-set S: full-set Exits: 6 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v4 = and i16 %v3, 1 +; CHECK-NEXT: --> (zext i1 {true,+,true,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: Determining loop execution counts for: @f0 +; CHECK-NEXT: Loop %b1: backedge-taken count is 1 +; CHECK-NEXT: Loop %b1: max backedge-taken count is 1 +; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 1 +; CHECK-NEXT: Predicates: +; CHECK-EMPTY: +; CHECK-NEXT: Loop %b1: Trip multiple is 2 +define void @f0() { +b0: + br label %b1 + +b1: ; preds = %b1, %b0 + %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ] + %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ] + %v2 = add nsw i16 %v0, 1 + %v3 = add nsw i16 %v1, %v0 + %v4 = and i16 %v3, 1 + %v5 = icmp ne i16 %v4, 0 + br i1 %v5, label %b1, label %b2 + +b2: ; preds = %b1 + ret void +} + +@g0 = common dso_local global i16 0, align 2 +@g1 = common dso_local global i32 0, align 4 +@g2 = common dso_local global i32* null, align 8 + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f1': +; CHECK-NEXT: Classifying expressions for: @f1 +; CHECK-NEXT: %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ] +; CHECK-NEXT: --> {0,+,3,+,1}<%b1> U: full-set S: full-set Exits: 7 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ] +; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v2 = trunc i32 %v1 to i16 +; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v3 = add i16 %v0, %v2 +; CHECK-NEXT: --> {3,+,4,+,1}<%b1> U: full-set S: full-set Exits: 12 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v4 = and i16 %v3, 1 +; CHECK-NEXT: --> (zext i1 {true,+,false,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v6 = add nuw nsw i32 %v1, 1 +; CHECK-NEXT: --> {4,+,1}<%b1> U: [4,7) S: [4,7) Exits: 6 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v7 = phi i32 [ %v1, %b1 ] +; CHECK-NEXT: --> %v7 U: [3,6) S: [3,6) +; CHECK-NEXT: %v8 = phi i16 [ %v3, %b1 ] +; CHECK-NEXT: --> %v8 U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @f1 +; CHECK-NEXT: Loop %b3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %b3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %b3: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %b1: backedge-taken count is 2 +; CHECK-NEXT: Loop %b1: max backedge-taken count is 2 +; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 2 +; CHECK-NEXT: Predicates: +; CHECK-EMPTY: +; CHECK-NEXT: Loop %b1: Trip multiple is 3 +define void @f1() #0 { +b0: + store i16 0, i16* @g0, align 2 + store i32* @g1, i32** @g2, align 8 + br label %b1 + +b1: ; preds = %b1, %b0 + %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ] + %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ] + %v2 = trunc i32 %v1 to i16 + %v3 = add i16 %v0, %v2 + %v4 = and i16 %v3, 1 + %v5 = icmp eq i16 %v4, 0 + %v6 = add nuw nsw i32 %v1, 1 + br i1 %v5, label %b2, label %b1 + +b2: ; preds = %b1 + %v7 = phi i32 [ %v1, %b1 ] + %v8 = phi i16 [ %v3, %b1 ] + store i32 %v7, i32* @g1, align 4 + store i16 %v8, i16* @g0, align 2 + br label %b3 + +b3: ; preds = %b3, %b2 + br label %b3 +} + +attributes #0 = { nounwind uwtable "target-cpu"="x86-64" } Index: llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll @@ -0,0 +1,51 @@ +; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s + +; The exit value from this loop was originally calculated as 0. +; The actual exit condition is 256*256 == 0 (in i16). + +; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'f0': +; CHECK-NEXT: Classifying expressions for: @f0 +; CHECK-NEXT: %v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ] +; CHECK-NEXT: --> {0,+,-1}<%b1> U: [-255,1) S: [-255,1) Exits: -255 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v2 = add i16 %v1, -1 +; CHECK-NEXT: --> {-1,+,-1}<%b1> U: [-256,0) S: [-256,0) Exits: -256 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v3 = mul i16 %v2, %v2 +; CHECK-NEXT: --> {1,+,3,+,2}<%b1> U: full-set S: full-set Exits: 0 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v5 = phi i16 [ %v2, %b1 ] +; CHECK-NEXT: --> %v5 U: [-256,0) S: [-256,0) +; CHECK-NEXT: %v6 = phi i16 [ %v3, %b1 ] +; CHECK-NEXT: --> %v6 U: full-set S: full-set +; CHECK-NEXT: %v7 = sext i16 %v5 to i32 +; CHECK-NEXT: --> (sext i16 %v5 to i32) U: [-256,0) S: [-256,0) +; CHECK-NEXT: Determining loop execution counts for: @f0 +; CHECK-NEXT: Loop %b1: backedge-taken count is 255 +; CHECK-NEXT: Loop %b1: max backedge-taken count is 255 +; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 255 +; CHECK-NEXT: Predicates: +; CHECK-EMPTY: +; CHECK-NEXT: Loop %b1: Trip multiple is 256 + + +@g0 = global i32 0, align 4 +@g1 = global i16 0, align 2 + +define signext i32 @f0() { +b0: + br label %b1 + +b1: ; preds = %b1, %b0 + %v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ] + %v2 = add i16 %v1, -1 + %v3 = mul i16 %v2, %v2 + %v4 = icmp eq i16 %v3, 0 + br i1 %v4, label %b2, label %b1 + +b2: ; preds = %b1 + %v5 = phi i16 [ %v2, %b1 ] + %v6 = phi i16 [ %v3, %b1 ] + %v7 = sext i16 %v5 to i32 + store i32 %v7, i32* @g0, align 4 + store i16 %v6, i16* @g1, align 2 + ret i32 0 +} + Index: llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic.ll =================================================================== --- llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic.ll +++ llvm/trunk/test/Analysis/ScalarEvolution/solve-quadratic.ll @@ -0,0 +1,364 @@ +; RUN: opt -analyze -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s +; REQUIRES: asserts + +; Use the following template to get a chrec {L,+,M,+,N}. +; +; define signext i32 @func() { +; entry: +; br label %loop +; +; loop: +; %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] +; %inc = phi i32 [ X, %entry ], [ %inc1, %loop ] +; %acc = phi i32 [ Y, %entry ], [ %acc1, %loop ] +; %ivr1 = add i32 %ivr, %inc +; %inc1 = add i32 %inc, Z ; M = inc1 = inc + N = X + N +; %acc1 = add i32 %acc, %inc ; L = acc1 = X + Y +; %and = and i32 %acc1, 2^W-1 ; iW +; %cond = icmp eq i32 %and, 0 +; br i1 %cond, label %exit, label %loop +; +; exit: +; %rv = phi i32 [ %acc1, %loop ] +; ret i32 %rv +; } +; +; From +; X + Y = L +; X + Z = M +; Z = N +; get +; X = M - N +; Y = N - M + L +; Z = N + +; The connection between the chrec coefficients {L,+,M,+,N} and the quadratic +; coefficients is that the quadratic equation is N x^2 + (2M-N) x + 2L = 0, +; where the equation was multiplied by 2 to make the coefficient at x^2 an +; integer (the actual equation is N/2 x^2 + (M-N/2) x + L = 0). + +; Quadratic equation: 2x^2 + 2x + 4 in i4, solution (wrap): 4 +; {14,+,14,+,14} -> X=0, Y=14, Z=14 +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test01' +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {-2,+,-2,+,-2}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 4 +; CHECK: GetQuadraticEquation: equation -2x^2 + -2x + -4, coeff bw: 5, multiplied by 2 +; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving -2x^2 + -2x + -4, rw:5 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 2x^2 + 2x + -28, rw:5 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 4 +; CHECK: Loop %loop: Unpredictable backedge-taken count +define signext i32 @test01() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ 0, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 14, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 14 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, 15 + %cond = icmp eq i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; Quadratic equation: 1x^2 + -73x + -146 in i32, solution (wrap): 75 +; {-72,+,-36,+,1} -> X=-37, Y=-35, Z=1 +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test02': +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-36,+,1}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 32 +; CHECK: GetQuadraticEquation: equation 1x^2 + -73x + 0, coeff bw: 33, multiplied by 2 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + 4294967154, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -142, rw:32 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + 4294967154, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -4294967438, rw:33 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 65573 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + -146, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -146, rw:32 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -73x + -146, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -73x + -146, rw:33 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 75 +; CHECK: Loop %loop: backedge-taken count is 75 +define signext i32 @test02() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -37, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ -35, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 1 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, -1 + %cond = icmp sgt i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; Quadratic equation: 2x^2 - 4x + 34 in i4, solution (exact): 1. +; {17,+,-1,+,2} -> X=-3, Y=20, Z=2 +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test03': +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {1,+,-1,+,2}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 4 +; CHECK: GetQuadraticEquation: equation 2x^2 + -4x + 2, coeff bw: 5, multiplied by 2 +; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 2x^2 + -4x + 2, rw:5 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 2x^2 + -4x + 2, rw:5 +; CHECK: SolveQuadraticEquationWrap: solution (root): 1 +; CHECK: Loop %loop: backedge-taken count is 1 +define signext i32 @test03() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -3, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 20, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 2 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, 15 + %cond = icmp eq i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; Quadratic equation 4x^2 + 2x + 2 in i16, solution (wrap): 181 +; {1,+,3,+,4} -> X=-1, Y=2, Z=4 (i16) +; +; This is an example where the returned solution is the first time an +; unsigned wrap occurs, whereas the actual exit condition occurs much +; later. The number of iterations returned by SolveQuadraticEquation +; is 181, but the loop will iterate 37174 times. +; +; Here is a C code that corresponds to this case that calculates the number +; of iterations: +; +; int test04() { +; int c = 0; +; int ivr = 0; +; int inc = -1; +; int acc = 2; +; +; while (acc & 0xffff) { +; c++; +; ivr += inc; +; inc += 4; +; acc += inc; +; } +; +; return c; +; } +; + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test04': +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,3,+,4}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 16 +; CHECK: GetQuadraticEquation: equation 4x^2 + 2x + 0, coeff bw: 17, multiplied by 2 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:16 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -65534, rw:16 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 128 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:16 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -65534, rw:16 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 128 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181 +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {1,+,3,+,4}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 16 +; CHECK: GetQuadraticEquation: equation 4x^2 + 2x + 2, coeff bw: 17, multiplied by 2 +; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 4x^2 + 2x + 2, rw:17 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 4x^2 + 2x + -131070, rw:17 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 181 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +define signext i32 @test04() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -1, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 2, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 4 + %acc1 = add i32 %acc, %inc + %and = trunc i32 %acc1 to i16 + %cond = icmp eq i16 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; A case with signed arithmetic, but unsigned comparison. + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test05': +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-1,+,-1}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 32 +; CHECK: GetQuadraticEquation: equation -1x^2 + -1x + 0, coeff bw: 33, multiplied by 2 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + 4, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4, rw:32 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 2 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + 4, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4, rw:33 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 2 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + -2, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -4294967294, rw:32 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 65536 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving -1x^2 + -1x + -2, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + 1x + -8589934590, rw:33 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 92682 +; CHECK: Loop %loop: backedge-taken count is 2 + +define signext i32 @test05() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ 0, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ -1, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, -1 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, -1 + %cond = icmp ule i32 %and, -3 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; A test that used to crash with one of the earlier versions of the code. + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test06': +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,-99999,+,1}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 32 +; CHECK: GetQuadraticEquation: equation 1x^2 + -199999x + 0, coeff bw: 33, multiplied by 2 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -4294967294, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 2, rw:32 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 1 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -4294967294, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 4294967298, rw:33 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 24469 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -12, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 4294967284, rw:32 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 24469 +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 1x^2 + -199999x + -12, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 1x^2 + -199999x + 8589934580, rw:33 +; CHECK: SolveQuadraticEquationWrap: solution (wrap): 62450 +; CHECK: Loop %loop: backedge-taken count is 24469 +define signext i32 @test06() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -100000, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 100000, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 1 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, -1 + %cond = icmp sgt i32 %and, 5 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; The equation +; 532052752x^2 + -450429774x + 71188414 = 0 +; has two exact solutions (up to two decimal digits): 0.21 and 0.64. +; Since there is no integer between them, there is no integer n that either +; solves the equation exactly, or changes the sign of it between n and n+1. + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test07': +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {0,+,40811489,+,532052752}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 32 +; CHECK: GetQuadraticEquation: equation 532052752x^2 + -450429774x + 0, coeff bw: 33, multiplied by 2 +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:32 +; CHECK: SolveQuadraticEquationWrap: no valid solution +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33 +; CHECK: SolveQuadraticEquationWrap: no valid solution +; CHECK: SolveQuadraticAddRecRange: solving for signed overflow +; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:32 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:32 +; CHECK: SolveQuadraticEquationWrap: no valid solution +; CHECK: SolveQuadraticAddRecRange: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33 +; CHECK: SolveQuadraticEquationWrap: no valid solution +; CHECK: GetQuadraticEquation: analyzing quadratic addrec: {35594207,+,40811489,+,532052752}<%loop> +; CHECK: GetQuadraticEquation: addrec coeff bw: 32 +; CHECK: GetQuadraticEquation: equation 532052752x^2 + -450429774x + 71188414, coeff bw: 33, multiplied by 2 +; CHECK: SolveQuadraticAddRecExact: solving for unsigned overflow +; CHECK: SolveQuadraticEquationWrap: solving 532052752x^2 + -450429774x + 71188414, rw:33 +; CHECK: SolveQuadraticEquationWrap: updated coefficients 532052752x^2 + -450429774x + 71188414, rw:33 +; CHECK: SolveQuadraticEquationWrap: no valid solution +; CHECK: Loop %loop: Unpredictable backedge-taken count. +define signext i32 @test07() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -491241263, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 526835470, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 532052752 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, -1 + %cond = icmp eq i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + Index: llvm/trunk/unittests/ADT/APIntTest.cpp =================================================================== --- llvm/trunk/unittests/ADT/APIntTest.cpp +++ llvm/trunk/unittests/ADT/APIntTest.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "gtest/gtest.h" #include @@ -2357,4 +2358,89 @@ } } +TEST(APIntTest, SolveQuadraticEquationWrap) { + // Verify that "Solution" is the first non-negative integer that solves + // Ax^2 + Bx + C = "0 or overflow", i.e. that it is a correct solution + // as calculated by SolveQuadraticEquationWrap. + auto Validate = [] (int A, int B, int C, unsigned Width, int Solution) { + int Mask = (1 << Width) - 1; + + // Solution should be non-negative. + EXPECT_GE(Solution, 0); + + auto OverflowBits = [] (int64_t V, unsigned W) { + return V & -(1 << W); + }; + + int64_t Over0 = OverflowBits(C, Width); + + auto IsZeroOrOverflow = [&] (int X) { + int64_t ValueAtX = A*X*X + B*X + C; + int64_t OverX = OverflowBits(ValueAtX, Width); + return (ValueAtX & Mask) == 0 || OverX != Over0; + }; + + auto EquationToString = [&] (const char *X_str) { + return Twine(A) + Twine(X_str) + Twine("^2 + ") + Twine(B) + + Twine(X_str) + Twine(" + ") + Twine(C) + Twine(", bitwidth: ") + + Twine(Width); + }; + + auto IsSolution = [&] (const char *X_str, int X) { + if (IsZeroOrOverflow(X)) + return ::testing::AssertionSuccess() + << X << " is a solution of " << EquationToString(X_str); + return ::testing::AssertionFailure() + << X << " is not an expected solution of " + << EquationToString(X_str); + }; + + auto IsNotSolution = [&] (const char *X_str, int X) { + if (!IsZeroOrOverflow(X)) + return ::testing::AssertionSuccess() + << X << " is not a solution of " << EquationToString(X_str); + return ::testing::AssertionFailure() + << X << " is an unexpected solution of " + << EquationToString(X_str); + }; + + // This is the important part: make sure that there is no solution that + // is less than the calculated one. + if (Solution > 0) { + for (int X = 1; X < Solution-1; ++X) + EXPECT_PRED_FORMAT1(IsNotSolution, X); + } + + // Verify that the calculated solution is indeed a solution. + EXPECT_PRED_FORMAT1(IsSolution, Solution); + }; + + // Generate all possible quadratic equations with Width-bit wide integer + // coefficients, get the solution from SolveQuadraticEquationWrap, and + // verify that the solution is correct. + auto Iterate = [&] (unsigned Width) { + assert(1 < Width && Width < 32); + int Low = -(1 << (Width-1)); + int High = (1 << (Width-1)); + + for (int A = Low; A != High; ++A) { + if (A == 0) + continue; + for (int B = Low; B != High; ++B) { + for (int C = Low; C != High; ++C) { + Optional S = APIntOps::SolveQuadraticEquationWrap( + APInt(Width, A), APInt(Width, B), + APInt(Width, C), Width); + if (S.hasValue()) + Validate(A, B, C, Width, S->getSExtValue()); + } + } + } + }; + + // Test all widths in [2..6]. + for (unsigned i = 2; i <= 6; ++i) + Iterate(i); +} + } // end anonymous namespace