Index: include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- include/llvm/Analysis/ScalarEvolution.h
+++ include/llvm/Analysis/ScalarEvolution.h
@@ -1152,16 +1152,23 @@
   const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
   const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
   const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+                         unsigned Depth = 0);
+  const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
+                         unsigned Depth) {
+    SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+    return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
+  }
   const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
                          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
     SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
     return getAddExpr(Ops, Flags);
   }
   const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
-                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap,
+                         unsigned Depth = 0) {
     SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
-    return getAddExpr(Ops, Flags);
+    return getAddExpr(Ops, Flags, Depth+1);
   }
   const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
                          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -127,11 +127,21 @@
     cl::desc("Threshold for inlining multiplication operands into a SCEV"),
     cl::init(1000));
 
+static cl::opt<unsigned> AddOpsInlineThreshold(
+    "scev-addops-inline-threshold", cl::Hidden,
+    cl::desc("Threshold for inlining addition operands into a SCEV"),
+    cl::init(500));
+
 static cl::opt<unsigned>
     MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
                     cl::desc("Maximum depth of recursive compare complexity"),
                     cl::init(32));
 
+static cl::opt<unsigned>
+    MaxAddExprDepth("scalar-evolution-max-addexpr-depth", cl::Hidden,
+                    cl::desc("Maximum depth of recursive AddExpr"),
+                    cl::init(32));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -2090,7 +2100,8 @@
 
 /// Get a canonical add expression, or something simpler if possible.
 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                                        SCEV::NoWrapFlags Flags) {
+                                        SCEV::NoWrapFlags Flags,
+                                        unsigned Depth) {
   assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) &&
          "only nuw or nsw allowed");
   assert(!Ops.empty() && "Cannot get empty add!");
@@ -2129,306 +2140,313 @@
     if (Ops.size() == 1) return Ops[0];
   }
 
-  // Okay, check to see if the same value occurs in the operand list more than
-  // once.  If so, merge them together into an multiply expression.  Since we
-  // sorted the list, these values are required to be adjacent.
-  Type *Ty = Ops[0]->getType();
-  bool FoundMatch = false;
-  for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
-    if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
-      // Scan ahead to count how many equal operands there are.
-      unsigned Count = 2;
-      while (i+Count != e && Ops[i+Count] == Ops[i])
-        ++Count;
-      // Merge the values into a multiply.
-      const SCEV *Scale = getConstant(Ty, Count);
-      const SCEV *Mul = getMulExpr(Scale, Ops[i]);
-      if (Ops.size() == Count)
-        return Mul;
-      Ops[i] = Mul;
-      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
-      --i; e -= Count - 1;
-      FoundMatch = true;
-    }
-  if (FoundMatch)
-    return getAddExpr(Ops, Flags);
-
-  // Check for truncates. If all the operands are truncated from the same
-  // type, see if factoring out the truncate would permit the result to be
-  // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
-  // if the contents of the resulting outer trunc fold to something simple.
-  for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
-    const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
-    Type *DstType = Trunc->getType();
-    Type *SrcType = Trunc->getOperand()->getType();
-    SmallVector<const SCEV *, 8> LargeOps;
-    bool Ok = true;
-    // Check all the operands to see if they can be represented in the
-    // source type of the truncate.
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
-        if (T->getOperand()->getType() != SrcType) {
-          Ok = false;
-          break;
-        }
-        LargeOps.push_back(T->getOperand());
-      } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
-        LargeOps.push_back(getAnyExtendExpr(C, SrcType));
-      } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
-        SmallVector<const SCEV *, 8> LargeMulOps;
-        for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
-          if (const SCEVTruncateExpr *T =
-                dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
-            if (T->getOperand()->getType() != SrcType) {
+  if (Depth <= MaxAddExprDepth) {
+    // Okay, check to see if the same value occurs in the operand list more than
+    // once.  If so, merge them together into an multiply expression.  Since we
+    // sorted the list, these values are required to be adjacent.
+    Type *Ty = Ops[0]->getType();
+    bool FoundMatch = false;
+    for (unsigned i = 0, e = Ops.size(); i != e-1; ++i)
+      if (Ops[i] == Ops[i+1]) {      //  X + Y + Y  -->  X + Y*2
+        // Scan ahead to count how many equal operands there are.
+        unsigned Count = 2;
+        while (i+Count != e && Ops[i+Count] == Ops[i])
+          ++Count;
+        // Merge the values into a multiply.
+        const SCEV *Scale = getConstant(Ty, Count);
+        const SCEV *Mul = getMulExpr(Scale, Ops[i]);
+        if (Ops.size() == Count)
+          return Mul;
+        Ops[i] = Mul;
+        Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count);
+        --i; e -= Count - 1;
+        FoundMatch = true;
+      }
+    if (FoundMatch)
+      return getAddExpr(Ops, Flags);
+
+    // Check for truncates. If all the operands are truncated from the same
+    // type, see if factoring out the truncate would permit the result to be
+    // folded. eg., trunc(x) + m*trunc(n) --> trunc(x + trunc(m)*n)
+    // if the contents of the resulting outer trunc fold to something simple.
+    for (; Idx < Ops.size() && isa<SCEVTruncateExpr>(Ops[Idx]); ++Idx) {
+      const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(Ops[Idx]);
+      Type *DstType = Trunc->getType();
+      Type *SrcType = Trunc->getOperand()->getType();
+      SmallVector<const SCEV *, 8> LargeOps;
+      bool Ok = true;
+      // Check all the operands to see if they can be represented in the
+      // source type of the truncate.
+      for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+        if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) {
+          if (T->getOperand()->getType() != SrcType) {
+            Ok = false;
+            break;
+          }
+          LargeOps.push_back(T->getOperand());
+        } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) {
+          LargeOps.push_back(getAnyExtendExpr(C, SrcType));
+        } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) {
+          SmallVector<const SCEV *, 8> LargeMulOps;
+          for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) {
+            if (const SCEVTruncateExpr *T =
+              dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) {
+              if (T->getOperand()->getType() != SrcType) {
+                Ok = false;
+                break;
+              }
+              LargeMulOps.push_back(T->getOperand());
+            } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
+              LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
+            } else {
               Ok = false;
               break;
             }
-            LargeMulOps.push_back(T->getOperand());
-          } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {
-            LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));
-          } else {
-            Ok = false;
-            break;
           }
+          if (Ok)
+            LargeOps.push_back(getMulExpr(LargeMulOps));
+        } else {
+          Ok = false;
+          break;
         }
-        if (Ok)
-          LargeOps.push_back(getMulExpr(LargeMulOps));
-      } else {
-        Ok = false;
-        break;
       }
-    }
-    if (Ok) {
-      // Evaluate the expression in the larger type.
-      const SCEV *Fold = getAddExpr(LargeOps, Flags);
-      // If it folds to something simple, use it. Otherwise, don't.
-      if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
-        return getTruncateExpr(Fold, DstType);
-    }
-  }
-
-  // Skip past any other cast SCEVs.
-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
-    ++Idx;
-
-  // If there are add operands they would be next.
-  if (Idx < Ops.size()) {
-    bool DeletedAdd = false;
-    while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
-      // If we have an add, expand the add operands onto the end of the operands
-      // list.
-      Ops.erase(Ops.begin()+Idx);
-      Ops.append(Add->op_begin(), Add->op_end());
-      DeletedAdd = true;
+      if (Ok) {
+        // Evaluate the expression in the larger type.
+        const SCEV *Fold = getAddExpr(LargeOps, Flags, Depth + 1);
+        // If it folds to something simple, use it. Otherwise, don't.
+        if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold))
+          return getTruncateExpr(Fold, DstType);
+      }
     }
 
-    // If we deleted at least one add, we added operands to the end of the list,
-    // and they are not necessarily sorted.  Recurse to resort and resimplify
-    // any operands we just acquired.
-    if (DeletedAdd)
-      return getAddExpr(Ops);
-  }
+    // Skip past any other cast SCEVs.
+    while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr)
+      ++Idx;
 
-  // Skip over the add expression until we get to a multiply.
-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
-    ++Idx;
-
-  // Check to see if there are any folding opportunities present with
-  // operands multiplied by constant values.
-  if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
-    uint64_t BitWidth = getTypeSizeInBits(Ty);
-    DenseMap<const SCEV *, APInt> M;
-    SmallVector<const SCEV *, 8> NewOps;
-    APInt AccumulatedConstant(BitWidth, 0);
-    if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
-                                     Ops.data(), Ops.size(),
-                                     APInt(BitWidth, 1), *this)) {
-      struct APIntCompare {
-        bool operator()(const APInt &LHS, const APInt &RHS) const {
-          return LHS.ult(RHS);
-        }
-      };
+    // If there are add operands they would be next.
+    if (Idx < Ops.size()) {
+      bool DeletedAdd = false;
+      while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) {
+        if (Ops.size() > AddOpsInlineThreshold ||
+            Add->getNumOperands() > AddOpsInlineThreshold)
+          break;
+        // If we have an add, expand the add operands onto the end of the operands
+        // list.
+        Ops.erase(Ops.begin()+Idx);
+        Ops.append(Add->op_begin(), Add->op_end());
+        DeletedAdd = true;
+      }
 
-      // Some interesting folding opportunity is present, so its worthwhile to
-      // re-generate the operands list. Group the operands by constant scale,
-      // to avoid multiplying by the same constant scale multiple times.
-      std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
-      for (const SCEV *NewOp : NewOps)
-        MulOpLists[M.find(NewOp)->second].push_back(NewOp);
-      // Re-generate the operands list.
-      Ops.clear();
-      if (AccumulatedConstant != 0)
-        Ops.push_back(getConstant(AccumulatedConstant));
-      for (auto &MulOp : MulOpLists)
-        if (MulOp.first != 0)
-          Ops.push_back(getMulExpr(getConstant(MulOp.first),
-                                   getAddExpr(MulOp.second)));
-      if (Ops.empty())
-        return getZero(Ty);
-      if (Ops.size() == 1)
-        return Ops[0];
-      return getAddExpr(Ops);
-    }
-  }
-
-  // If we are adding something to a multiply expression, make sure the
-  // something is not already an operand of the multiply.  If so, merge it into
-  // the multiply.
-  for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
-    const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
-    for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
-      const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
-      if (isa<SCEVConstant>(MulOpSCEV))
-        continue;
-      for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
-        if (MulOpSCEV == Ops[AddOp]) {
-          // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
-          const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
-          if (Mul->getNumOperands() != 2) {
-            // If the multiply has more than two operands, we must get the
-            // Y*Z term.
-            SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
-                                                Mul->op_begin()+MulOp);
-            MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
-            InnerMul = getMulExpr(MulOps);
+      // If we deleted at least one add, we added operands to the end of the list,
+      // and they are not necessarily sorted.  Recurse to resort and resimplify
+      // any operands we just acquired.
+      if (DeletedAdd)
+        return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
+    }
+
+    // Skip over the add expression until we get to a multiply.
+    while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr)
+      ++Idx;
+
+    // Check to see if there are any folding opportunities present with
+    // operands multiplied by constant values.
+    if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) {
+      uint64_t BitWidth = getTypeSizeInBits(Ty);
+      DenseMap<const SCEV *, APInt> M;
+      SmallVector<const SCEV *, 8> NewOps;
+      APInt AccumulatedConstant(BitWidth, 0);
+      if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,
+        Ops.data(), Ops.size(),
+        APInt(BitWidth, 1), *this)) {
+        struct APIntCompare {
+          bool operator()(const APInt &LHS, const APInt &RHS) const {
+            return LHS.ult(RHS);
           }
-          const SCEV *One = getOne(Ty);
-          const SCEV *AddOne = getAddExpr(One, InnerMul);
-          const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
-          if (Ops.size() == 2) return OuterMul;
-          if (AddOp < Idx) {
-            Ops.erase(Ops.begin()+AddOp);
-            Ops.erase(Ops.begin()+Idx-1);
-          } else {
-            Ops.erase(Ops.begin()+Idx);
-            Ops.erase(Ops.begin()+AddOp-1);
-          }
-          Ops.push_back(OuterMul);
-          return getAddExpr(Ops);
-        }
+        };
+
+        // Some interesting folding opportunity is present, so its worthwhile to
+        // re-generate the operands list. Group the operands by constant scale,
+        // to avoid multiplying by the same constant scale multiple times.
+        std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists;
+        for (const SCEV *NewOp : NewOps)
+          MulOpLists[M.find(NewOp)->second].push_back(NewOp);
+        // Re-generate the operands list.
+        Ops.clear();
+        if (AccumulatedConstant != 0)
+          Ops.push_back(getConstant(AccumulatedConstant));
+        for (auto &MulOp : MulOpLists)
+          if (MulOp.first != 0)
+            Ops.push_back(getMulExpr(
+                getConstant(MulOp.first),
+                getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1)));
+        if (Ops.empty())
+          return getZero(Ty);
+        if (Ops.size() == 1)
+          return Ops[0];
+        return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
+      }
+    }
 
-      // Check this multiply against other multiplies being added together.
-      for (unsigned OtherMulIdx = Idx+1;
-           OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
-           ++OtherMulIdx) {
-        const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
-        // If MulOp occurs in OtherMul, we can fold the two multiplies
-        // together.
-        for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
-             OMulOp != e; ++OMulOp)
-          if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
-            // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
-            const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+    // If we are adding something to a multiply expression, make sure the
+    // something is not already an operand of the multiply.  If so, merge it into
+    // the multiply.
+    for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) {
+      const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]);
+      for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) {
+        const SCEV *MulOpSCEV = Mul->getOperand(MulOp);
+        if (isa<SCEVConstant>(MulOpSCEV))
+          continue;
+        for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp)
+          if (MulOpSCEV == Ops[AddOp]) {
+            // Fold W + X + (X * Y * Z)  -->  W + (X * ((Y*Z)+1))
+            const SCEV *InnerMul = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
+              // If the multiply has more than two operands, we must get the
+              // Y*Z term.
               SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
                                                   Mul->op_begin()+MulOp);
               MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
-              InnerMul1 = getMulExpr(MulOps);
-            }
-            const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
-            if (OtherMul->getNumOperands() != 2) {
-              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
-                                                  OtherMul->op_begin()+OMulOp);
-              MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end());
-              InnerMul2 = getMulExpr(MulOps);
+              InnerMul = getMulExpr(MulOps);
             }
-            const SCEV *InnerMulSum = getAddExpr(InnerMul1,InnerMul2);
-            const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+            const SCEV *One = getOne(Ty);
+            const SCEV *AddOne = getAddExpr(One, InnerMul, Depth + 1);
+            const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);
             if (Ops.size() == 2) return OuterMul;
-            Ops.erase(Ops.begin()+Idx);
-            Ops.erase(Ops.begin()+OtherMulIdx-1);
+            if (AddOp < Idx) {
+              Ops.erase(Ops.begin()+AddOp);
+              Ops.erase(Ops.begin()+Idx-1);
+            } else {
+              Ops.erase(Ops.begin()+Idx);
+              Ops.erase(Ops.begin()+AddOp-1);
+            }
             Ops.push_back(OuterMul);
-            return getAddExpr(Ops);
+            return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
           }
-      }
-    }
-  }
-
-  // If there are any add recurrences in the operands list, see if any other
-  // added values are loop invariant.  If so, we can fold them into the
-  // recurrence.
-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
-    ++Idx;
 
-  // Scan over all recurrences, trying to fold loop invariants into them.
-  for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
-    // Scan all of the other operands to this add and add them to the vector if
-    // they are loop invariant w.r.t. the recurrence.
-    SmallVector<const SCEV *, 8> LIOps;
-    const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
-    const Loop *AddRecLoop = AddRec->getLoop();
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (isLoopInvariant(Ops[i], AddRecLoop)) {
-        LIOps.push_back(Ops[i]);
-        Ops.erase(Ops.begin()+i);
-        --i; --e;
+        // Check this multiply against other multiplies being added together.
+        for (unsigned OtherMulIdx = Idx+1;
+          OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]);
+          ++OtherMulIdx) {
+          const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]);
+          // If MulOp occurs in OtherMul, we can fold the two multiplies
+          // together.
+          for (unsigned OMulOp = 0, e = OtherMul->getNumOperands();
+            OMulOp != e; ++OMulOp)
+            if (OtherMul->getOperand(OMulOp) == MulOpSCEV) {
+              // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
+              const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0);
+              if (Mul->getNumOperands() != 2) {
+                SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                    Mul->op_begin()+MulOp);
+                MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());
+                InnerMul1 = getMulExpr(MulOps);
+              }
+              const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0);
+              if (OtherMul->getNumOperands() != 2) {
+                SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+                  OtherMul->op_begin()+OMulOp);
+                MulOps.append(OtherMul->op_begin()+OMulOp+1,
+                              OtherMul->op_end());
+                InnerMul2 = getMulExpr(MulOps);
+              }
+              const SCEV *InnerMulSum =
+                  getAddExpr(InnerMul1, InnerMul2, Depth + 1);
+              const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum);
+              if (Ops.size() == 2) return OuterMul;
+              Ops.erase(Ops.begin()+Idx);
+              Ops.erase(Ops.begin()+OtherMulIdx-1);
+              Ops.push_back(OuterMul);
+              return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
+            }
+        }
       }
+    }
 
-    // If we found some loop invariants, fold them into the recurrence.
-    if (!LIOps.empty()) {
-      //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
-      LIOps.push_back(AddRec->getStart());
-
-      SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
-                                             AddRec->op_end());
-      // This follows from the fact that the no-wrap flags on the outer add
-      // expression are applicable on the 0th iteration, when the add recurrence
-      // will be equal to its start value.
-      AddRecOps[0] = getAddExpr(LIOps, Flags);
-
-      // Build the new addrec. Propagate the NUW and NSW flags if both the
-      // outer add and the inner addrec are guaranteed to have no overflow.
-      // Always propagate NW.
-      Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
-      const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
-
-      // If all of the other operands were loop invariant, we are done.
-      if (Ops.size() == 1) return NewRec;
-
-      // Otherwise, add the folded AddRec by the non-invariant parts.
-      for (unsigned i = 0;; ++i)
-        if (Ops[i] == AddRec) {
-          Ops[i] = NewRec;
-          break;
+    // If there are any add recurrences in the operands list, see if any other
+    // added values are loop invariant.  If so, we can fold them into the
+    // recurrence.
+    while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr)
+      ++Idx;
+
+    // Scan over all recurrences, trying to fold loop invariants into them.
+    for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) {
+      // Scan all of the other operands to this add and add them to the vector if
+      // they are loop invariant w.r.t. the recurrence.
+      SmallVector<const SCEV *, 8> LIOps;
+      const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
+      const Loop *AddRecLoop = AddRec->getLoop();
+      for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+        if (isLoopInvariant(Ops[i], AddRecLoop)) {
+          LIOps.push_back(Ops[i]);
+          Ops.erase(Ops.begin()+i);
+          --i; --e;
         }
-      return getAddExpr(Ops);
-    }
 
-    // Okay, if there weren't any loop invariants to be folded, check to see if
-    // there are multiple AddRec's with the same loop induction variable being
-    // added together.  If so, we can fold them.
-    for (unsigned OtherIdx = Idx+1;
-         OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-         ++OtherIdx)
-      if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
-        // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
+      // If we found some loop invariants, fold them into the recurrence.
+      if (!LIOps.empty()) {
+        //  NLI + LI + {Start,+,Step}  -->  NLI + {LI+Start,+,Step}
+        LIOps.push_back(AddRec->getStart());
+
         SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
-                                               AddRec->op_end());
-        for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
-             ++OtherIdx)
-          if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
-            if (OtherAddRec->getLoop() == AddRecLoop) {
-              for (unsigned i = 0, e = OtherAddRec->getNumOperands();
-                   i != e; ++i) {
-                if (i >= AddRecOps.size()) {
-                  AddRecOps.append(OtherAddRec->op_begin()+i,
-                                   OtherAddRec->op_end());
-                  break;
+          AddRec->op_end());
+        // This follows from the fact that the no-wrap flags on the outer add
+        // expression are applicable on the 0th iteration, when the add recurrence
+        // will be equal to its start value.
+        AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
+
+        // Build the new addrec. Propagate the NUW and NSW flags if both the
+        // outer add and the inner addrec are guaranteed to have no overflow.
+        // Always propagate NW.
+        Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW));
+        const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags);
+
+        // If all of the other operands were loop invariant, we are done.
+        if (Ops.size() == 1) return NewRec;
+
+        // Otherwise, add the folded AddRec by the non-invariant parts.
+        for (unsigned i = 0;; ++i)
+          if (Ops[i] == AddRec) {
+            Ops[i] = NewRec;
+            break;
+          }
+        return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
+      }
+
+      // Okay, if there weren't any loop invariants to be folded, check to see if
+      // there are multiple AddRec's with the same loop induction variable being
+      // added together.  If so, we can fold them.
+      for (unsigned OtherIdx = Idx+1;
+        OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+        ++OtherIdx)
+        if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) {
+          // Other + {A,+,B}<L> + {C,+,D}<L>  -->  Other + {A+C,+,B+D}<L>
+          SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(),
+            AddRec->op_end());
+          for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);
+            ++OtherIdx)
+            if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))
+              if (OtherAddRec->getLoop() == AddRecLoop) {
+                for (unsigned i = 0, e = OtherAddRec->getNumOperands();
+                  i != e; ++i) {
+                  if (i >= AddRecOps.size()) {
+                    AddRecOps.append(OtherAddRec->op_begin()+i,
+                      OtherAddRec->op_end());
+                    break;
+                  }
+                  AddRecOps[i] = getAddExpr(
+                      AddRecOps[i], OtherAddRec->getOperand(i), Depth + 1);
                 }
-                AddRecOps[i] = getAddExpr(AddRecOps[i],
-                                          OtherAddRec->getOperand(i));
+                Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
               }
-              Ops.erase(Ops.begin() + OtherIdx); --OtherIdx;
-            }
-        // Step size has changed, so we cannot guarantee no self-wraparound.
-        Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
-        return getAddExpr(Ops);
-      }
+          // Step size has changed, so we cannot guarantee no self-wraparound.
+          Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap);
+          return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1);
+        }
 
-    // Otherwise couldn't fold anything into this recurrence.  Move onto the
-    // next one.
+      // Otherwise couldn't fold anything into this recurrence.  Move onto the
+      // next one.
+    }
   }
-
   // Okay, it looks like we really DO need an add expr.  Check to see if we
   // already have one, otherwise create a new one.
   FoldingSetNodeID ID;
Index: unittests/Analysis/ScalarEvolutionTest.cpp
===================================================================
--- unittests/Analysis/ScalarEvolutionTest.cpp
+++ unittests/Analysis/ScalarEvolutionTest.cpp
@@ -532,5 +532,27 @@
   EXPECT_NE(nullptr, SE.getSCEV(Acc[0]));
 }
 
+TEST_F(ScalarEvolutionsTest, SCEVAddExpr) {
+  FunctionType *FTy =
+      FunctionType::get(Type::getVoidTy(Context), std::vector<Type *>(), false);
+  Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
+  BasicBlock *EntryBB = BasicBlock::Create(Context, "entry", F);
+
+  auto *Ty = Type::getInt32Ty(Context);
+  Instruction *Mul1 = BinaryOperator::CreateMul(UndefValue::get(Ty), UndefValue::get(Ty), "", EntryBB);
+  Instruction *Add1 = BinaryOperator::CreateAdd(Mul1, UndefValue::get(Ty), "", EntryBB);
+  Mul1 = BinaryOperator::CreateMul(Add1, UndefValue::get(Ty), "", EntryBB);
+  Instruction *Add2 = BinaryOperator::CreateAdd(Mul1, Add1, "", EntryBB);
+  for (int i = 0; i < 1000; i++) {
+    Mul1 = BinaryOperator::CreateMul(Add2, Add1, "", EntryBB);
+    Add1 = Add2;
+    Add2 = BinaryOperator::CreateAdd(Mul1, Add1, "", EntryBB);
+  }
+
+  ReturnInst::Create(Context, nullptr, EntryBB);
+  ScalarEvolution SE = buildSE(*F);
+  EXPECT_NE(nullptr, SE.getSCEV(Mul1));
+}
+
 }  // end anonymous namespace
 }  // end namespace llvm