Index: include/llvm/Analysis/ScalarEvolution.h =================================================================== --- include/llvm/Analysis/ScalarEvolution.h +++ include/llvm/Analysis/ScalarEvolution.h @@ -1152,16 +1152,23 @@ const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty); const SCEV *getAddExpr(SmallVectorImpl &Ops, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0); + const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, + unsigned Depth) { + SmallVector Ops = {LHS, RHS}; + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); + } const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { SmallVector Ops = {LHS, RHS}; return getAddExpr(Ops, Flags); } const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2, - SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) { + SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap, + unsigned Depth = 0) { SmallVector Ops = {Op0, Op1, Op2}; - return getAddExpr(Ops, Flags); + return getAddExpr(Ops, Flags, Depth+1); } const SCEV *getMulExpr(SmallVectorImpl &Ops, SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap); Index: lib/Analysis/ScalarEvolution.cpp =================================================================== --- lib/Analysis/ScalarEvolution.cpp +++ lib/Analysis/ScalarEvolution.cpp @@ -127,11 +127,21 @@ cl::desc("Threshold for inlining multiplication operands into a SCEV"), cl::init(1000)); +static cl::opt AddOpsInlineThreshold( + "scev-addops-inline-threshold", cl::Hidden, + cl::desc("Threshold for inlining addition operands into a SCEV"), + cl::init(500)); + static cl::opt MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden, cl::desc("Maximum depth of recursive compare complexity"), cl::init(32)); +static cl::opt + MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden, + cl::desc("Maximum depth of recursive AddExpr"), + cl::init(32)); + //===----------------------------------------------------------------------===// // SCEV class definitions //===----------------------------------------------------------------------===// @@ -2090,7 +2100,8 @@ /// Get a canonical add expression, or something simpler if possible. const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, - SCEV::NoWrapFlags Flags) { + SCEV::NoWrapFlags Flags, + unsigned Depth) { assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && "only nuw or nsw allowed"); assert(!Ops.empty() && "Cannot get empty add!"); @@ -2129,306 +2140,313 @@ if (Ops.size() == 1) return Ops[0]; } - // Okay, check to see if the same value occurs in the operand list more than - // once. If so, merge them together into an multiply expression. Since we - // sorted the list, these values are required to be adjacent. - Type *Ty = Ops[0]->getType(); - bool FoundMatch = false; - for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) - if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 - // Scan ahead to count how many equal operands there are. - unsigned Count = 2; - while (i+Count != e && Ops[i+Count] == Ops[i]) - ++Count; - // Merge the values into a multiply. - const SCEV *Scale = getConstant(Ty, Count); - const SCEV *Mul = getMulExpr(Scale, Ops[i]); - if (Ops.size() == Count) - return Mul; - Ops[i] = Mul; - Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); - --i; e -= Count - 1; - FoundMatch = true; - } - if (FoundMatch) - return getAddExpr(Ops, Flags); - - // Check for truncates. If all the operands are truncated from the same - // type, see if factoring out the truncate would permit the result to be - // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) - // if the contents of the resulting outer trunc fold to something simple. - for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { - const SCEVTruncateExpr *Trunc = cast(Ops[Idx]); - Type *DstType = Trunc->getType(); - Type *SrcType = Trunc->getOperand()->getType(); - SmallVector LargeOps; - bool Ok = true; - // Check all the operands to see if they can be represented in the - // source type of the truncate. - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - if (const SCEVTruncateExpr *T = dyn_cast(Ops[i])) { - if (T->getOperand()->getType() != SrcType) { - Ok = false; - break; - } - LargeOps.push_back(T->getOperand()); - } else if (const SCEVConstant *C = dyn_cast(Ops[i])) { - LargeOps.push_back(getAnyExtendExpr(C, SrcType)); - } else if (const SCEVMulExpr *M = dyn_cast(Ops[i])) { - SmallVector LargeMulOps; - for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { - if (const SCEVTruncateExpr *T = - dyn_cast(M->getOperand(j))) { - if (T->getOperand()->getType() != SrcType) { + if (Depth <= MaxAddExprDepth) { + // Okay, check to see if the same value occurs in the operand list more than + // once. If so, merge them together into an multiply expression. Since we + // sorted the list, these values are required to be adjacent. + Type *Ty = Ops[0]->getType(); + bool FoundMatch = false; + for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) + if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 + // Scan ahead to count how many equal operands there are. + unsigned Count = 2; + while (i+Count != e && Ops[i+Count] == Ops[i]) + ++Count; + // Merge the values into a multiply. + const SCEV *Scale = getConstant(Ty, Count); + const SCEV *Mul = getMulExpr(Scale, Ops[i]); + if (Ops.size() == Count) + return Mul; + Ops[i] = Mul; + Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); + --i; e -= Count - 1; + FoundMatch = true; + } + if (FoundMatch) + return getAddExpr(Ops, Flags); + + // Check for truncates. If all the operands are truncated from the same + // type, see if factoring out the truncate would permit the result to be + // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n) + // if the contents of the resulting outer trunc fold to something simple. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + const SCEVTruncateExpr *Trunc = cast(Ops[Idx]); + Type *DstType = Trunc->getType(); + Type *SrcType = Trunc->getOperand()->getType(); + SmallVector LargeOps; + bool Ok = true; + // Check all the operands to see if they can be represented in the + // source type of the truncate. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + if (const SCEVTruncateExpr *T = dyn_cast(Ops[i])) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeOps.push_back(T->getOperand()); + } else if (const SCEVConstant *C = dyn_cast(Ops[i])) { + LargeOps.push_back(getAnyExtendExpr(C, SrcType)); + } else if (const SCEVMulExpr *M = dyn_cast(Ops[i])) { + SmallVector LargeMulOps; + for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { + if (const SCEVTruncateExpr *T = + dyn_cast(M->getOperand(j))) { + if (T->getOperand()->getType() != SrcType) { + Ok = false; + break; + } + LargeMulOps.push_back(T->getOperand()); + } else if (const auto *C = dyn_cast(M->getOperand(j))) { + LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); + } else { Ok = false; break; } - LargeMulOps.push_back(T->getOperand()); - } else if (const auto *C = dyn_cast(M->getOperand(j))) { - LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); - } else { - Ok = false; - break; } + if (Ok) + LargeOps.push_back(getMulExpr(LargeMulOps)); + } else { + Ok = false; + break; } - if (Ok) - LargeOps.push_back(getMulExpr(LargeMulOps)); - } else { - Ok = false; - break; } - } - if (Ok) { - // Evaluate the expression in the larger type. - const SCEV *Fold = getAddExpr(LargeOps, Flags); - // If it folds to something simple, use it. Otherwise, don't. - if (isa(Fold) || isa(Fold)) - return getTruncateExpr(Fold, DstType); - } - } - - // Skip past any other cast SCEVs. - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) - ++Idx; - - // If there are add operands they would be next. - if (Idx < Ops.size()) { - bool DeletedAdd = false; - while (const SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { - // If we have an add, expand the add operands onto the end of the operands - // list. - Ops.erase(Ops.begin()+Idx); - Ops.append(Add->op_begin(), Add->op_end()); - DeletedAdd = true; + if (Ok) { + // Evaluate the expression in the larger type. + const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1); + // If it folds to something simple, use it. Otherwise, don't. + if (isa(Fold) || isa(Fold)) + return getTruncateExpr(Fold, DstType); + } } - // If we deleted at least one add, we added operands to the end of the list, - // and they are not necessarily sorted. Recurse to resort and resimplify - // any operands we just acquired. - if (DeletedAdd) - return getAddExpr(Ops); - } + // Skip past any other cast SCEVs. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) + ++Idx; - // Skip over the add expression until we get to a multiply. - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) - ++Idx; - - // Check to see if there are any folding opportunities present with - // operands multiplied by constant values. - if (Idx < Ops.size() && isa(Ops[Idx])) { - uint64_t BitWidth = getTypeSizeInBits(Ty); - DenseMap M; - SmallVector NewOps; - APInt AccumulatedConstant(BitWidth, 0); - if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, - Ops.data(), Ops.size(), - APInt(BitWidth, 1), *this)) { - struct APIntCompare { - bool operator()(const APInt &LHS, const APInt &RHS) const { - return LHS.ult(RHS); - } - }; + // If there are add operands they would be next. + if (Idx < Ops.size()) { + bool DeletedAdd = false; + while (const SCEVAddExpr *Add = dyn_cast(Ops[Idx])) { + if (Ops.size() > AddOpsInlineThreshold || + Add->getNumOperands() > AddOpsInlineThreshold) + break; + // If we have an add, expand the add operands onto the end of the operands + // list. + Ops.erase(Ops.begin()+Idx); + Ops.append(Add->op_begin(), Add->op_end()); + DeletedAdd = true; + } - // Some interesting folding opportunity is present, so its worthwhile to - // re-generate the operands list. Group the operands by constant scale, - // to avoid multiplying by the same constant scale multiple times. - std::map, APIntCompare> MulOpLists; - for (const SCEV *NewOp : NewOps) - MulOpLists[M.find(NewOp)->second].push_back(NewOp); - // Re-generate the operands list. - Ops.clear(); - if (AccumulatedConstant != 0) - Ops.push_back(getConstant(AccumulatedConstant)); - for (auto &MulOp : MulOpLists) - if (MulOp.first != 0) - Ops.push_back(getMulExpr(getConstant(MulOp.first), - getAddExpr(MulOp.second))); - if (Ops.empty()) - return getZero(Ty); - if (Ops.size() == 1) - return Ops[0]; - return getAddExpr(Ops); - } - } - - // If we are adding something to a multiply expression, make sure the - // something is not already an operand of the multiply. If so, merge it into - // the multiply. - for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { - const SCEVMulExpr *Mul = cast(Ops[Idx]); - for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { - const SCEV *MulOpSCEV = Mul->getOperand(MulOp); - if (isa(MulOpSCEV)) - continue; - for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) - if (MulOpSCEV == Ops[AddOp]) { - // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) - const SCEV *InnerMul = Mul->getOperand(MulOp == 0); - if (Mul->getNumOperands() != 2) { - // If the multiply has more than two operands, we must get the - // Y*Z term. - SmallVector MulOps(Mul->op_begin(), - Mul->op_begin()+MulOp); - MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul = getMulExpr(MulOps); + // If we deleted at least one add, we added operands to the end of the list, + // and they are not necessarily sorted. Recurse to resort and resimplify + // any operands we just acquired. + if (DeletedAdd) + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); + } + + // Skip over the add expression until we get to a multiply. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) + ++Idx; + + // Check to see if there are any folding opportunities present with + // operands multiplied by constant values. + if (Idx < Ops.size() && isa(Ops[Idx])) { + uint64_t BitWidth = getTypeSizeInBits(Ty); + DenseMap M; + SmallVector NewOps; + APInt AccumulatedConstant(BitWidth, 0); + if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, + Ops.data(), Ops.size(), + APInt(BitWidth, 1), *this)) { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); } - const SCEV *One = getOne(Ty); - const SCEV *AddOne = getAddExpr(One, InnerMul); - const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); - if (Ops.size() == 2) return OuterMul; - if (AddOp < Idx) { - Ops.erase(Ops.begin()+AddOp); - Ops.erase(Ops.begin()+Idx-1); - } else { - Ops.erase(Ops.begin()+Idx); - Ops.erase(Ops.begin()+AddOp-1); - } - Ops.push_back(OuterMul); - return getAddExpr(Ops); - } + }; + + // Some interesting folding opportunity is present, so its worthwhile to + // re-generate the operands list. Group the operands by constant scale, + // to avoid multiplying by the same constant scale multiple times. + std::map, APIntCompare> MulOpLists; + for (const SCEV *NewOp : NewOps) + MulOpLists[M.find(NewOp)->second].push_back(NewOp); + // Re-generate the operands list. + Ops.clear(); + if (AccumulatedConstant != 0) + Ops.push_back(getConstant(AccumulatedConstant)); + for (auto &MulOp : MulOpLists) + if (MulOp.first != 0) + Ops.push_back(getMulExpr( + getConstant(MulOp.first), + getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1))); + if (Ops.empty()) + return getZero(Ty); + if (Ops.size() == 1) + return Ops[0]; + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); + } + } - // Check this multiply against other multiplies being added together. - for (unsigned OtherMulIdx = Idx+1; - OtherMulIdx < Ops.size() && isa(Ops[OtherMulIdx]); - ++OtherMulIdx) { - const SCEVMulExpr *OtherMul = cast(Ops[OtherMulIdx]); - // If MulOp occurs in OtherMul, we can fold the two multiplies - // together. - for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); - OMulOp != e; ++OMulOp) - if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { - // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) - const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); + // If we are adding something to a multiply expression, make sure the + // something is not already an operand of the multiply. If so, merge it into + // the multiply. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + const SCEVMulExpr *Mul = cast(Ops[Idx]); + for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { + const SCEV *MulOpSCEV = Mul->getOperand(MulOp); + if (isa(MulOpSCEV)) + continue; + for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) + if (MulOpSCEV == Ops[AddOp]) { + // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) + const SCEV *InnerMul = Mul->getOperand(MulOp == 0); if (Mul->getNumOperands() != 2) { + // If the multiply has more than two operands, we must get the + // Y*Z term. SmallVector MulOps(Mul->op_begin(), Mul->op_begin()+MulOp); MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); - InnerMul1 = getMulExpr(MulOps); - } - const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); - if (OtherMul->getNumOperands() != 2) { - SmallVector MulOps(OtherMul->op_begin(), - OtherMul->op_begin()+OMulOp); - MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); - InnerMul2 = getMulExpr(MulOps); + InnerMul = getMulExpr(MulOps); } - const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2); - const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + const SCEV *One = getOne(Ty); + const SCEV *AddOne = getAddExpr(One, InnerMul, Depth + 1); + const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV); if (Ops.size() == 2) return OuterMul; - Ops.erase(Ops.begin()+Idx); - Ops.erase(Ops.begin()+OtherMulIdx-1); + if (AddOp < Idx) { + Ops.erase(Ops.begin()+AddOp); + Ops.erase(Ops.begin()+Idx-1); + } else { + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+AddOp-1); + } Ops.push_back(OuterMul); - return getAddExpr(Ops); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); } - } - } - } - - // If there are any add recurrences in the operands list, see if any other - // added values are loop invariant. If so, we can fold them into the - // recurrence. - while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) - ++Idx; - // Scan over all recurrences, trying to fold loop invariants into them. - for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { - // Scan all of the other operands to this add and add them to the vector if - // they are loop invariant w.r.t. the recurrence. - SmallVector LIOps; - const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); - const Loop *AddRecLoop = AddRec->getLoop(); - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (isLoopInvariant(Ops[i], AddRecLoop)) { - LIOps.push_back(Ops[i]); - Ops.erase(Ops.begin()+i); - --i; --e; + // Check this multiply against other multiplies being added together. + for (unsigned OtherMulIdx = Idx+1; + OtherMulIdx < Ops.size() && isa(Ops[OtherMulIdx]); + ++OtherMulIdx) { + const SCEVMulExpr *OtherMul = cast(Ops[OtherMulIdx]); + // If MulOp occurs in OtherMul, we can fold the two multiplies + // together. + for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); + OMulOp != e; ++OMulOp) + if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { + // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) + const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); + if (Mul->getNumOperands() != 2) { + SmallVector MulOps(Mul->op_begin(), + Mul->op_begin()+MulOp); + MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); + InnerMul1 = getMulExpr(MulOps); + } + const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); + if (OtherMul->getNumOperands() != 2) { + SmallVector MulOps(OtherMul->op_begin(), + OtherMul->op_begin()+OMulOp); + MulOps.append(OtherMul->op_begin()+OMulOp+1, + OtherMul->op_end()); + InnerMul2 = getMulExpr(MulOps); + } + const SCEV *InnerMulSum = + getAddExpr(InnerMul1, InnerMul2, Depth + 1); + const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum); + if (Ops.size() == 2) return OuterMul; + Ops.erase(Ops.begin()+Idx); + Ops.erase(Ops.begin()+OtherMulIdx-1); + Ops.push_back(OuterMul); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); + } + } } + } - // If we found some loop invariants, fold them into the recurrence. - if (!LIOps.empty()) { - // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} - LIOps.push_back(AddRec->getStart()); - - SmallVector AddRecOps(AddRec->op_begin(), - AddRec->op_end()); - // This follows from the fact that the no-wrap flags on the outer add - // expression are applicable on the 0th iteration, when the add recurrence - // will be equal to its start value. - AddRecOps[0] = getAddExpr(LIOps, Flags); - - // Build the new addrec. Propagate the NUW and NSW flags if both the - // outer add and the inner addrec are guaranteed to have no overflow. - // Always propagate NW. - Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); - const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); - - // If all of the other operands were loop invariant, we are done. - if (Ops.size() == 1) return NewRec; - - // Otherwise, add the folded AddRec by the non-invariant parts. - for (unsigned i = 0;; ++i) - if (Ops[i] == AddRec) { - Ops[i] = NewRec; - break; + // If there are any add recurrences in the operands list, see if any other + // added values are loop invariant. If so, we can fold them into the + // recurrence. + while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) + ++Idx; + + // Scan over all recurrences, trying to fold loop invariants into them. + for (; Idx < Ops.size() && isa(Ops[Idx]); ++Idx) { + // Scan all of the other operands to this add and add them to the vector if + // they are loop invariant w.r.t. the recurrence. + SmallVector LIOps; + const SCEVAddRecExpr *AddRec = cast(Ops[Idx]); + const Loop *AddRecLoop = AddRec->getLoop(); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (isLoopInvariant(Ops[i], AddRecLoop)) { + LIOps.push_back(Ops[i]); + Ops.erase(Ops.begin()+i); + --i; --e; } - return getAddExpr(Ops); - } - // Okay, if there weren't any loop invariants to be folded, check to see if - // there are multiple AddRec's with the same loop induction variable being - // added together. If so, we can fold them. - for (unsigned OtherIdx = Idx+1; - OtherIdx < Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { - // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} + // If we found some loop invariants, fold them into the recurrence. + if (!LIOps.empty()) { + // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} + LIOps.push_back(AddRec->getStart()); + SmallVector AddRecOps(AddRec->op_begin(), - AddRec->op_end()); - for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); - ++OtherIdx) - if (const auto *OtherAddRec = dyn_cast(Ops[OtherIdx])) - if (OtherAddRec->getLoop() == AddRecLoop) { - for (unsigned i = 0, e = OtherAddRec->getNumOperands(); - i != e; ++i) { - if (i >= AddRecOps.size()) { - AddRecOps.append(OtherAddRec->op_begin()+i, - OtherAddRec->op_end()); - break; + AddRec->op_end()); + // This follows from the fact that the no-wrap flags on the outer add + // expression are applicable on the 0th iteration, when the add recurrence + // will be equal to its start value. + AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); + + // Build the new addrec. Propagate the NUW and NSW flags if both the + // outer add and the inner addrec are guaranteed to have no overflow. + // Always propagate NW. + Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); + const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); + + // If all of the other operands were loop invariant, we are done. + if (Ops.size() == 1) return NewRec; + + // Otherwise, add the folded AddRec by the non-invariant parts. + for (unsigned i = 0;; ++i) + if (Ops[i] == AddRec) { + Ops[i] = NewRec; + break; + } + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); + } + + // Okay, if there weren't any loop invariants to be folded, check to see if + // there are multiple AddRec's with the same loop induction variable being + // added together. If so, we can fold them. + for (unsigned OtherIdx = Idx+1; + OtherIdx < Ops.size() && isa(Ops[OtherIdx]); + ++OtherIdx) + if (AddRecLoop == cast(Ops[OtherIdx])->getLoop()) { + // Other + {A,+,B} + {C,+,D} --> Other + {A+C,+,B+D} + SmallVector AddRecOps(AddRec->op_begin(), + AddRec->op_end()); + for (; OtherIdx != Ops.size() && isa(Ops[OtherIdx]); + ++OtherIdx) + if (const auto *OtherAddRec = dyn_cast(Ops[OtherIdx])) + if (OtherAddRec->getLoop() == AddRecLoop) { + for (unsigned i = 0, e = OtherAddRec->getNumOperands(); + i != e; ++i) { + if (i >= AddRecOps.size()) { + AddRecOps.append(OtherAddRec->op_begin()+i, + OtherAddRec->op_end()); + break; + } + AddRecOps[i] = getAddExpr( + AddRecOps[i], OtherAddRec->getOperand(i), Depth + 1); } - AddRecOps[i] = getAddExpr(AddRecOps[i], - OtherAddRec->getOperand(i)); + Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; } - Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; - } - // Step size has changed, so we cannot guarantee no self-wraparound. - Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); - return getAddExpr(Ops); - } + // Step size has changed, so we cannot guarantee no self-wraparound. + Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); + return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); + } - // Otherwise couldn't fold anything into this recurrence. Move onto the - // next one. + // Otherwise couldn't fold anything into this recurrence. Move onto the + // next one. + } } - // Okay, it looks like we really DO need an add expr. Check to see if we // already have one, otherwise create a new one. FoldingSetNodeID ID; Index: unittests/Analysis/ScalarEvolutionTest.cpp =================================================================== --- unittests/Analysis/ScalarEvolutionTest.cpp +++ unittests/Analysis/ScalarEvolutionTest.cpp @@ -532,5 +532,27 @@ EXPECT_NE(nullptr, SE.getSCEV(Acc[0])); } +TEST_F(ScalarEvolutionsTest, SCEVAddExpr) { + FunctionType *FTy = + FunctionType::get(Type::getVoidTy(Context), std::vector(), false); + Function *F = cast(M.getOrInsertFunction("f", FTy)); + BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", F); + + auto *Ty = Type::getInt32Ty(Context); + Instruction *Mul1 = BinaryOperator::CreateMul(UndefValue::get(Ty), UndefValue::get(Ty), "", EntryBB); + Instruction *Add1 = BinaryOperator::CreateAdd(Mul1, UndefValue::get(Ty), "", EntryBB); + Mul1 = BinaryOperator::CreateMul(Add1, UndefValue::get(Ty), "", EntryBB); + Instruction *Add2 = BinaryOperator::CreateAdd(Mul1, Add1, "", EntryBB); + for (int i = 0; i < 1000; i++) { + Mul1 = BinaryOperator::CreateMul(Add2, Add1, "", EntryBB); + Add1 = Add2; + Add2 = BinaryOperator::CreateAdd(Mul1, Add1, "", EntryBB); + } + + ReturnInst::Create(Context, nullptr, EntryBB); + ScalarEvolution SE = buildSE(*F); + EXPECT_NE(nullptr, SE.getSCEV(Mul1)); +} + } // end anonymous namespace } // end namespace llvm