Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -8252,10 +8252,9 @@ return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); } -/// Find the roots of the quadratic equation for the given quadratic chrec -/// {L,+,M,+,N}. This returns either the two roots (which might be the same) or -/// two SCEVCouldNotCompute objects. -static Optional> +/// Find the smallest non-negative number n, such that after n iterations +/// the quadratic chrec {L,+,M,+,N} equals 0 or changes sign. +static Optional SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); const SCEVConstant *LC = dyn_cast(AddRec->getOperand(0)); @@ -8267,54 +8266,198 @@ return None; uint32_t BitWidth = LC->getAPInt().getBitWidth(); - const APInt &L = LC->getAPInt(); - const APInt &M = MC->getAPInt(); - const APInt &N = NC->getAPInt(); - APInt Two(BitWidth, 2); + APInt L = LC->getAPInt(); + APInt M = MC->getAPInt(); + APInt N = NC->getAPInt(); + assert(!N.isNullValue() && "This is not a quadratic chrec"); + + // The result of APInt arithmetic has the same bit width as the operands, + // so it can actually lose high bits. A product of two n-bit integers needs + // 2n-1 bits to represent the full value. The conversion used here from + // n-bit chrec coefficients produces n+2-bit quadratic coefficients. + // The operation done below (on quadratic coefficients) that can produce + // the largest value is the evaluation of the equation during bisection, + // which needs 3 times the bitwidth of the coefficient. + // The total number of required bits is 3(n+2) = 3n+6. + uint32_t NewWidth = 3*BitWidth + 6; + if (BitWidth == 1) { + // In i1, 1 is the same as -1, so sign-extending (i1 1) would actually + // produce -1 in the larger type. + N = N.zext(NewWidth); + M = M.zext(NewWidth); + L = L.zext(NewWidth); + } else { + // At the same time, coefficients that are not in i1 should have their + // sign preserved. + N = N.sext(NewWidth); + M = M.sext(NewWidth); + L = L.sext(NewWidth); + } + + // It is preferable to return a value that has the same bit width as the + // AddRec's coefficients. If the solution fits in the original bit width, + // truncate it (except for i1). + auto TruncToOrig = [BitWidth,&SE] (const APInt &V) { + if (BitWidth > 1 && V.isIntN(BitWidth)) + return cast(SE.getConstant(V.trunc(BitWidth))); + return cast(SE.getConstant(V)); + }; - // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C + // The increments are M, M+N, M+2N, ..., so the accumulated values are + // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is, + // L+M, L+2M+N, L+3M+3N, ... + // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N. + // + // The equation Acc = 0 is then + // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0. + // In a quadratic form it becomes: + // N n^2 + (2M-N) n + 2L = 0. + + APInt A = N; + APInt B = 2 * M - A; + APInt C = 2 * L; + APInt R = APInt::getOneBitSet(NewWidth, BitWidth); + + // Make A > 0 for simplicity. + if (A.isNegative()) { + A.negate(); + B.negate(); + C.negate(); + } + LLVM_DEBUG(dbgs() << __func__ << ": solving " << A << "x^2 + " << B + << "x + " << C << ", bw:" << BitWidth << " for AddRec: " + << *AddRec << '\n'); + + // Identify 0 as a (non)solution immediately. + if (C.isNullValue()) { + LLVM_DEBUG(dbgs() << __func__ << ": zero solution\n"); + return TruncToOrig(APInt(BitWidth, 0)); + } + + // Solving an equation q(x) = 0 with coefficients in modular arithmetic + // is really solving a set of equations q(x) = 2kR for k = 0, 1, 2, ..., + // and R = 2^BitWidth. (It's 2R because the equation was multiplied by 2.) + // Since we're trying not only to find exact solutions, but also values + // that "wrap around", such a set will always have a solution, i.e. an x + // that satisfies at least one of the equations, or such that q(x) has + // a "different sign" than q(x-1). + // + // We need to find a value k, such that Ax^2 + Bx + C = 2kR will have a + // positive solution n (in the above sense), and also such that the n + // will be the least among all k = 0, 1, ... + // + // Consider the parabola (over real numbers) that corresponds to the + // quadratic equation. Since A > 0, the arms of the parabola will point + // up. Picking different values of k will shift it up and down by 2R. + // The interesting solutions are the ceilings of the real number solutions. + // + // The vertex of the parabola is at -B/2A, but since A > 0, it's negative + // iff B is positive. + + APInt TwoA = 2 * A; + APInt TwoR = 2 * R; + APInt SqrB = B * B; + bool PickLow; + + if (B.isNonNegative()) { + // If B >= 0, the vertex it at a negative location (or at 0), so in + // order to have a non-negative solution we need to pick k that makes + // C-2kR negative. To satisfy all the requirements for the solution + // that we are looking for, it needs to be closest to 0 of all k. + C = C.srem(TwoR); + if (C.isStrictlyPositive()) + C -= TwoR; + // Pick the greater solution. + PickLow = false; + } else { + // If B < 0, the vertex is at a positive location. For any solution + // to exist, the discriminant must be non-negative. This means that + // C-2kR <= B^2/4A is a necessary condition for k, i.e. there is a + // lower bound on values of k: 2kR >= C - B^2/4A. + APInt Low2kR = C - SqrB.udiv(2*TwoA); // udiv because all values > 0. + + // If there exists k meeting the condition above, and such that + // C-2kR > 0, there will be two positive real number solutions of + // q(x) = 2kR. Out of all such values of k, pick the one that makes + // C-2kR closest to 0, (i.e. pick maximum k such that C-2kR > 0). + if (C.sgt(Low2kR) && (C-Low2kR).ugt(TwoR)) { + // There exists 2kR such that Low2kR <= C-2kR < C. Minimize C-2kR + // by subtracting the maximum 2kR for which C-2kR >= Low2kR. + C = (C-Low2kR).urem(TwoR) + Low2kR; + // Pick the smaller solution. + PickLow = true; + } else { + // If C-2kR < 0 for all potential k's, pick the 2kR closest to the + // lower bound (i.e. make C-2kR closest to 0). + // Round Low2kR up to the nearest multiple of 2R and subtract it from C. + C -= Low2kR; + APInt U = Low2kR.urem(TwoR); + if (!U.isNullValue()) + C -= (TwoR - U); + // Pick the greater solution. + PickLow = false; + } + } - // The A coefficient is N/2 - APInt A = N.sdiv(Two); + LLVM_DEBUG(dbgs() << __func__ << ": updated coefficients " << A << "x^2 + " + << B << "x + " << C << " bw:" << BitWidth+1 << '\n'); - // The B coefficient is M-N/2 - APInt B = M; - B -= A; // A is the same as N/2. + APInt D = SqrB - 4*A*C; + assert(D.isNonNegative() && "Negative discriminant"); + APInt SQ = D.sqrt(); - // The C coefficient is L. - const APInt& C = L; + APInt Q = SQ * SQ; + // The calculated SQ may actually be greater than the exact (non-integer) + // value. If that's the case, decremement SQ to get a value that is lower. + if (Q.sgt(D)) + SQ -= 1; - // Compute the B^2-4ac term. - APInt SqrtTerm = B; - SqrtTerm *= B; - SqrtTerm -= 4 * (A * C); + APInt X; + APInt Rem; - if (SqrtTerm.isNegative()) { - // The loop is provably infinite. - return None; + if (PickLow) + APInt::sdivrem(-B - SQ, TwoA, X, Rem); + else + APInt::sdivrem(-B + SQ, TwoA, X, Rem); + + // The updated coefficients should be such that the (exact) solution is + // positive. Since APInt division rounds towards 0, the calculated one + // can be 0, but cannot be negative. + assert(X.isNonNegative() && "Solution should be non-negative"); + + if (Q == D) { + // If the division was not exact, add 1. + if (!Rem.isNullValue()) { + X += 1; + LLVM_DEBUG(dbgs() << __func__ << ": solution (sign): " << X << '\n'); + } else { + LLVM_DEBUG(dbgs() << __func__ << ": solution (root): " << X << '\n'); + } + return TruncToOrig(X); } - // Compute sqrt(B^2-4ac). This is guaranteed to be the nearest - // integer value or else APInt::sqrt() will assert. - APInt SqrtVal = SqrtTerm.sqrt(); - - // Compute the two solutions for the quadratic formula. - // The divisions must be performed as signed divisions. - APInt NegB = -std::move(B); - APInt TwoA = std::move(A); - TwoA <<= 1; - if (TwoA.isNullValue()) + // The exact value of the square root should be between SQ and 2*SQ. + // Calculate the "overestimated" root that corresponds to 2*SQ: + APInt Y = PickLow ? (-B - 2*SQ).sdiv(TwoA) + : (-B + 2*SQ).sdiv(TwoA); + // If AX^2 + BX + C < 0 and AY^2 + BY + C > 0, then use bisection + // to find the best approximation of the root. + APInt VX = (A*X + B)*X + C; + APInt VY = (A*Y + B)*Y + C; + if (!VX.isNegative() || VY.isNegative()) return None; - LLVMContext &Context = SE.getContext(); - - ConstantInt *Solution1 = - ConstantInt::get(Context, (NegB + SqrtVal).sdiv(TwoA)); - ConstantInt *Solution2 = - ConstantInt::get(Context, (NegB - SqrtVal).sdiv(TwoA)); + while (X != Y) { + APInt T = (X + Y).sdiv(2); + APInt V = (A*T + B)*T + C; + if (V.isNegative()) + X = T+1; + else + Y = T; + } - return std::make_pair(cast(SE.getConstant(Solution1)), - cast(SE.getConstant(Solution2))); + LLVM_DEBUG(dbgs() << __func__ << ": solution (sign): " << X << '\n'); + return TruncToOrig(X); } ScalarEvolution::ExitLimit @@ -8349,23 +8492,15 @@ // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of // the quadratic equation to solve it. if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { - if (auto Roots = SolveQuadraticEquation(AddRec, *this)) { - const SCEVConstant *R1 = Roots->first; - const SCEVConstant *R2 = Roots->second; - // Pick the smallest positive root value. - if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( - CmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (!CB->getZExtValue()) - std::swap(R1, R2); // R1 is the minimum root now. - - // We can only use this value if the chrec ends up with an exact zero - // value at this index. When solving for "X*X != 5", for example, we - // should not accept a root of 2. - const SCEV *Val = AddRec->evaluateAtIteration(R1, *this); - if (Val->isZero()) - // We found a quadratic root! - return ExitLimit(R1, R1, false, Predicates); - } + if (auto Root = SolveQuadraticEquation(AddRec, *this)) { + // We can only use this value if the chrec ends up with an exact zero + // value at this index. When solving for "X*X != 5", for example, we + // should not accept a root of 2. + const SCEVConstant *R = Root.getValue(); + const SCEV *Val = AddRec->evaluateAtIteration(R, *this); + if (Val->isZero()) + // We found a quadratic root! + return ExitLimit(R, R, false, Predicates); } return getCouldNotCompute(); } @@ -10473,7 +10608,9 @@ ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && "Linear scev computation is off in a bad way!"); return SE.getConstant(ExitValue); - } else if (isQuadratic()) { + } + + if (isQuadratic()) { // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of the // quadratic equation to solve it. To do this, we must frame our problem in // terms of figuring out when zero is crossed, instead of when @@ -10483,41 +10620,25 @@ const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap); // Next, solve the constructed addrec - if (auto Roots = + if (auto Root = SolveQuadraticEquation(cast(NewAddRec), SE)) { - const SCEVConstant *R1 = Roots->first; - const SCEVConstant *R2 = Roots->second; - // Pick the smallest positive root value. - if (ConstantInt *CB = dyn_cast(ConstantExpr::getICmp( - ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) { - if (!CB->getZExtValue()) - std::swap(R1, R2); // R1 is the minimum root now. - + const SCEVConstant *R = Root.getValue(); // Make sure the root is not off by one. The returned iteration should // not be in the range, but the previous one should be. When solving // for "X*X < 5", for example, we should not return a root of 2. - ConstantInt *R1Val = - EvaluateConstantChrecAtConstant(this, R1->getValue(), SE); - if (Range.contains(R1Val->getValue())) { - // The next iteration must be out of the range... - ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getAPInt() + 1); - - R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); - if (!Range.contains(R1Val->getValue())) - return SE.getConstant(NextVal); + ConstantInt *RootVal = + EvaluateConstantChrecAtConstant(this, R->getValue(), SE); + if (Range.contains(RootVal->getValue())) return SE.getCouldNotCompute(); // Something strange happened - } - // If R1 was not in the range, then it is a good return value. Make - // sure that R1-1 WAS in the range though, just in case. + // If RootVal was not in the range, then it is a good return value. + // Make sure that RootVal-1 WAS in the range though, just in case. ConstantInt *NextVal = - ConstantInt::get(SE.getContext(), R1->getAPInt() - 1); - R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE); - if (Range.contains(R1Val->getValue())) - return R1; + ConstantInt::get(SE.getContext(), R->getAPInt() - 1); + RootVal = EvaluateConstantChrecAtConstant(this, NextVal, SE); + if (Range.contains(RootVal->getValue())) + return R; return SE.getCouldNotCompute(); // Something strange happened - } } } Index: test/Analysis/ScalarEvolution/solve-quadratic-i1.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/solve-quadratic-i1.ll @@ -0,0 +1,98 @@ +; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s + +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f0': +; CHECK-NEXT: Classifying expressions for: @f0 +; CHECK-NEXT: %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ] +; CHECK-NEXT: --> {2,+,1}<%b1> U: [2,4) S: [2,4) Exits: 3 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ] +; CHECK-NEXT: --> {1,+,2,+,1}<%b1> U: full-set S: full-set Exits: 3 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v2 = add nsw i16 %v0, 1 +; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,5) S: [3,5) Exits: 4 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v3 = add nsw i16 %v1, %v0 +; CHECK-NEXT: --> {3,+,3,+,1}<%b1> U: full-set S: full-set Exits: 6 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v4 = and i16 %v3, 1 +; CHECK-NEXT: --> (zext i1 {true,+,true,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: Determining loop execution counts for: @f0 +; CHECK-NEXT: Loop %b1: backedge-taken count is 1 +; CHECK-NEXT: Loop %b1: max backedge-taken count is 1 +; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 1 +; CHECK-NEXT: Predicates: +; CHECK: Loop %b1: Trip multiple is 2 +define void @f0() { +b0: + br label %b1 + +b1: ; preds = %b1, %b0 + %v0 = phi i16 [ 2, %b0 ], [ %v2, %b1 ] + %v1 = phi i16 [ 1, %b0 ], [ %v3, %b1 ] + %v2 = add nsw i16 %v0, 1 + %v3 = add nsw i16 %v1, %v0 + %v4 = and i16 %v3, 1 + %v5 = icmp ne i16 %v4, 0 + br i1 %v5, label %b1, label %b2 + +b2: ; preds = %b1 + ret void +} + +@g0 = common dso_local global i16 0, align 2 +@g1 = common dso_local global i32 0, align 4 +@g2 = common dso_local global i32* null, align 8 + +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'f1': +; CHECK-NEXT: Classifying expressions for: @f1 +; CHECK-NEXT: %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ] +; CHECK-NEXT: --> {0,+,3,+,1}<%b1> U: full-set S: full-set Exits: 7 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ] +; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v2 = trunc i32 %v1 to i16 +; CHECK-NEXT: --> {3,+,1}<%b1> U: [3,6) S: [3,6) Exits: 5 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v3 = add i16 %v0, %v2 +; CHECK-NEXT: --> {3,+,4,+,1}<%b1> U: full-set S: full-set Exits: 12 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v4 = and i16 %v3, 1 +; CHECK-NEXT: --> (zext i1 {true,+,false,+,true}<%b1> to i16) U: [0,2) S: [0,2) Exits: 0 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v6 = add nuw nsw i32 %v1, 1 +; CHECK-NEXT: --> {4,+,1}<%b1> U: [4,7) S: [4,7) Exits: 6 LoopDispositions: { %b1: Computable } +; CHECK-NEXT: %v7 = phi i32 [ %v1, %b1 ] +; CHECK-NEXT: --> %v7 U: [3,6) S: [3,6) +; CHECK-NEXT: %v8 = phi i16 [ %v3, %b1 ] +; CHECK-NEXT: --> %v8 U: full-set S: full-set +; CHECK-NEXT: Determining loop execution counts for: @f1 +; CHECK-NEXT: Loop %b3: Unpredictable backedge-taken count. +; CHECK-NEXT: Loop %b3: Unpredictable max backedge-taken count. +; CHECK-NEXT: Loop %b3: Unpredictable predicated backedge-taken count. +; CHECK-NEXT: Loop %b1: backedge-taken count is 2 +; CHECK-NEXT: Loop %b1: max backedge-taken count is 2 +; CHECK-NEXT: Loop %b1: Predicated backedge-taken count is 2 +; CHECK-NEXT: Predicates: +; CHECK: Loop %b1: Trip multiple is 3 +define void @f1() #0 { +b0: + store i16 0, i16* @g0, align 2 + store i32* @g1, i32** @g2, align 8 + br label %b1 + +b1: ; preds = %b1, %b0 + %v0 = phi i16 [ 0, %b0 ], [ %v3, %b1 ] + %v1 = phi i32 [ 3, %b0 ], [ %v6, %b1 ] + %v2 = trunc i32 %v1 to i16 + %v3 = add i16 %v0, %v2 + %v4 = and i16 %v3, 1 + %v5 = icmp eq i16 %v4, 0 + %v6 = add nuw nsw i32 %v1, 1 + br i1 %v5, label %b2, label %b1 + +b2: ; preds = %b1 + %v7 = phi i32 [ %v1, %b1 ] + %v8 = phi i16 [ %v3, %b1 ] + store i32 %v7, i32* @g1, align 4 + store i16 %v8, i16* @g0, align 2 + br label %b3 + +b3: ; preds = %b3, %b2 + br label %b3 +} + +attributes #0 = { nounwind uwtable "target-cpu"="x86-64" } Index: test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll @@ -0,0 +1,27 @@ +; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s + +; CHECK: Loop %b1: backedge-taken count is 255 + +@g0 = global i32 0, align 4 +@g1 = global i16 0, align 2 + +define signext i32 @f0() { +b0: + br label %b1 + +b1: ; preds = %b1, %b0 + %v1 = phi i16 [ 0, %b0 ], [ %v2, %b1 ] + %v2 = add i16 %v1, -1 + %v3 = mul i16 %v2, %v2 + %v4 = icmp eq i16 %v3, 0 + br i1 %v4, label %b2, label %b1 + +b2: ; preds = %b1 + %v5 = phi i16 [ %v2, %b1 ] + %v6 = phi i16 [ %v3, %b1 ] + %v7 = sext i16 %v5 to i32 + store i32 %v7, i32* @g0, align 4 + store i16 %v6, i16* @g1, align 2 + ret i32 0 +} + Index: test/Analysis/ScalarEvolution/solve-quadratic.ll =================================================================== --- /dev/null +++ test/Analysis/ScalarEvolution/solve-quadratic.ll @@ -0,0 +1,150 @@ +; RUN: opt -analyze -scalar-evolution -S -debug-only=scalar-evolution < %s 2>&1 | FileCheck %s + +; Use the following template to get n chrec {L,+,M,+,N}. +; +; define signext i32 @func() { +; entry: +; br label %loop +; +; loop: +; %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] +; %inc = phi i32 [ X, %entry ], [ %inc1, %loop ] +; %acc = phi i32 [ Y, %entry ], [ %acc1, %loop ] +; %ivr1 = add i32 %ivr, %inc +; %inc1 = add i32 %inc, Z ; M = inc1 = inc + N = X + N +; %acc1 = add i32 %acc, %inc ; L = acc1 = X + Y +; %and = and i32 %acc1, 2^W-1 ; iW +; %cond = icmp eq i32 %and, 0 +; br i1 %cond, label %exit, label %loop +; +; exit: +; %rv = phi i32 [ %acc1, %loop ] +; ret i32 %rv +; } +; +; From +; X + Y = L +; X + Z = M +; Z = N +; get +; X = M - N +; Y = N - M + L +; Z = N + +; The connection between chrec coefficients {L,+,M,+,N} and the quadratic +; coefficients is that the quadratic equation is N x^2 + (2M-N) x + 2L = 0, +; where the equation was multiplied by 2 (if needed) to make 2L an even +; number. + +; Quadratic equation: 2x^2 + 2x + 4 in i4, solution (sign change): 4 +; {14,+,14,+,14} -> X=0, Y=14, Z=14 +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test01' +; CHECK: SolveQuadraticEquation: solving 2x^2 + 2x + 4, bw:4 for AddRec: {-2,+,-2,+,-2}<%loop> +; CHECK: SolveQuadraticEquation: updated coefficients 2x^2 + 2x + -28 bw:5 +; CHECK: SolveQuadraticEquation: solution (sign): 4 +; CHECK: Loop %loop: Unpredictable backedge-taken count +define signext i32 @test01() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ 0, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 14, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 14 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, 15 + %cond = icmp eq i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; Quadratic equation: 1x^2 + -73x + -146 in i32, solution (sign change): 75 +; {-72,+,-36,+,1} -> X=-37, Y=-35, Z=1 +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test02': +; CHECK: SolveQuadraticEquation: solving 1x^2 + -73x + -146, bw:32 for AddRec: {-73,+,-36,+,1}<%loop> +; CHECK: SolveQuadraticEquation: updated coefficients 1x^2 + -73x + -146 bw:33 +; CHECK: SolveQuadraticEquation: solution (sign): 75 +; CHECK: Loop %loop: backedge-taken count is 75 +define signext i32 @test02() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -37, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ -35, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 1 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, -1 + %cond = icmp sgt i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; Quadratic equation: 2x^2 - 4x + 34 in i4, solution (exact): 1. +; {17,+,-1,+,2} -> X=-3, Y=20, Z=2 +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test03': +; CHECK: SolveQuadraticEquation: solving 2x^2 + -4x + 2, bw:4 for AddRec: {1,+,-1,+,2}<%loop> +; CHECK: SolveQuadraticEquation: updated coefficients 2x^2 + -4x + 2 bw:5 +; CHECK: SolveQuadraticEquation: solution (root): 1 +; CHECK: Loop %loop: backedge-taken count is 1 +define signext i32 @test03() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -3, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 20, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 2 + %acc1 = add i32 %acc, %inc + %and = and i32 %acc1, 15 + %cond = icmp eq i32 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} + +; Quadratic equation 4x^2 + 2x + 2 in i16, solution (sign change): 181 +; {1,+,3,+,4} -> X=-1, Y=2, Z=4 (i16) +; +; CHECK-LABEL: Printing analysis 'Scalar Evolution Analysis' for function 'test04': +; CHECK: SolveQuadraticEquation: solving 4x^2 + 2x + 2, bw:16 for AddRec: {1,+,3,+,4}<%loop> +; CHECK: SolveQuadraticEquation: updated coefficients 4x^2 + 2x + -131070 bw:17 +; CHECK: SolveQuadraticEquation: solution (sign): 181 +; CHECK: Loop %loop: Unpredictable backedge-taken count. +define signext i32 @test04() { +entry: + br label %loop + +loop: + %ivr = phi i32 [ 0, %entry ], [ %ivr1, %loop ] + %inc = phi i32 [ -1, %entry ], [ %inc1, %loop ] + %acc = phi i32 [ 2, %entry ], [ %acc1, %loop ] + %ivr1 = add i32 %ivr, %inc + %inc1 = add i32 %inc, 4 + %acc1 = add i32 %acc, %inc + %and = trunc i32 %acc1 to i16 + %cond = icmp eq i16 %and, 0 + br i1 %cond, label %exit, label %loop + +exit: + %rv = phi i32 [ %acc1, %loop ] + ret i32 %rv +} +