Index: include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfo.h
+++ include/llvm/Analysis/TargetTransformInfo.h
@@ -293,6 +293,11 @@
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale) const;
 
+  /// \brief Return true when the AddRec can be folded into other insn. This is
+  /// possible when the target supports pre/postinc and the stride of the AddRec
+  /// is an acceptable const.
+  bool isAddRecFoldable(int64_t stride) const;
+
   /// \brief Return true if the target works with masked instruction
   /// AVX2 allows masks for consecutive load and store for i32 and i64 elements.
   /// AVX-512 architecture will also allow masks for non-consecutive memory
@@ -524,6 +529,7 @@
   virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) = 0;
   virtual bool isLegalAddImmediate(int64_t Imm) = 0;
   virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
+  virtual bool isAddRecFoldable(int64_t stride) = 0;
   virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                      int64_t BaseOffset, bool HasBaseReg,
                                      int64_t Scale) = 0;
@@ -631,6 +637,9 @@
   bool isLegalICmpImmediate(int64_t Imm) override {
     return Impl.isLegalICmpImmediate(Imm);
   }
+  bool isAddRecFoldable(int64_t stride) override {
+    return Impl.isAddRecFoldable(stride);
+  }
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale) override {
     return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
Index: include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- include/llvm/Analysis/TargetTransformInfoImpl.h
+++ include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -204,6 +204,8 @@
 
   bool isLegalICmpImmediate(int64_t Imm) { return false; }
 
+  bool isAddRecFoldable(int64_t stride) { return false; }
+
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale) {
     // Guess that reg+reg addressing is allowed. This heuristic is taken from
Index: include/llvm/CodeGen/BasicTTIImpl.h
===================================================================
--- include/llvm/CodeGen/BasicTTIImpl.h
+++ include/llvm/CodeGen/BasicTTIImpl.h
@@ -122,6 +122,10 @@
     return getTLI()->isLegalICmpImmediate(imm);
   }
 
+  bool isAddRecFoldable(int64_t stride) {
+    return getTLI()->isAddRecFoldable(stride);
+  }
+
   bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
                              bool HasBaseReg, int64_t Scale) {
     TargetLoweringBase::AddrMode AM;
Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -1461,6 +1461,11 @@
     return true;
   }
 
+  /// \brief Return true when the AddRec can be folded into other insn. This is
+  /// possible when the target supports pre/postinc and the stride of the AddRec
+  /// is an acceptable const.
+  virtual bool isAddRecFoldable(int64_t stride) const { return false; }
+
   /// Return true if the specified immediate is legal add immediate, that is the
   /// target has add instructions which can add a register with the immediate
   /// without having to materialize the immediate into a register.
Index: lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- lib/Analysis/TargetTransformInfo.cpp
+++ lib/Analysis/TargetTransformInfo.cpp
@@ -93,6 +93,10 @@
   return TTIImpl->isLegalICmpImmediate(Imm);
 }
 
+bool TargetTransformInfo::isAddRecFoldable(int64_t stride) const {
+  return TTIImpl->isAddRecFoldable(stride);
+}
+
 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
                                                 int64_t BaseOffset,
                                                 bool HasBaseReg,
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -307,6 +307,7 @@
 
   bool isLegalAddImmediate(int64_t) const override;
   bool isLegalICmpImmediate(int64_t) const override;
+  bool isAddRecFoldable(int64_t stride) const override;
 
   EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
                           bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6682,6 +6682,12 @@
   return isLegalAddImmediate(Immed);
 }
 
+/// AArch64 supports pre/postinc so the stride of the AddRec may be folded
+/// into other load/store insn.
+bool AArch64TargetLowering::isAddRecFoldable(int64_t stride) const {
+  return true;
+}
+
 /// isLegalAddressingMode - Return true if the addressing mode represented
 /// by AM is legal for this target, for a load/store of the specified type.
 bool AArch64TargetLowering::isLegalAddressingMode(const AddrMode &AM,
Index: lib/Target/ARM/ARMISelLowering.h
===================================================================
--- lib/Target/ARM/ARMISelLowering.h
+++ lib/Target/ARM/ARMISelLowering.h
@@ -294,6 +294,10 @@
     /// the immediate into a register.
     bool isLegalICmpImmediate(int64_t Imm) const override;
 
+    /// isAddRecFoldable - Return true if the const stride of AddRec can be
+    /// folded into previous load/store insn via pre/postinc addressing mode.
+    bool isAddRecFoldable(int64_t stride) const override;
+
     /// isLegalAddImmediate - Return true if the specified immediate is legal
     /// add immediate, that is the target has add instructions which can
     /// add a register and the immediate without having to materialize
Index: lib/Target/ARM/ARMISelLowering.cpp
===================================================================
--- lib/Target/ARM/ARMISelLowering.cpp
+++ lib/Target/ARM/ARMISelLowering.cpp
@@ -10188,6 +10188,10 @@
   return Imm >= 0 && Imm <= 255;
 }
 
+/// ARM supports pre/postinc so the stride of the AddRec may be folded
+/// into other load/store insn.
+bool ARMTargetLowering::isAddRecFoldable(int64_t stride) const { return true; }
+
 /// isLegalAddImmediate - Return true if the specified immediate is a legal add
 /// *or sub* immediate, that is the target has add or sub instructions which can
 /// add a register with the immediate without having to materialize the
Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp
===================================================================
--- lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -851,7 +851,9 @@
                                  const LSRUse &LU, const Formula &F);
 // Get the cost of the scaling factor used in F for LU.
 static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
-                                     const LSRUse &LU, const Formula &F);
+                                     const LSRUse &LU, const Formula &F,
+                                     unsigned &InstNumCost);
+static bool isAddressType(const LSRUse &LU);
 
 namespace {
 
@@ -859,18 +861,23 @@
 class Cost {
   /// TODO: Some of these could be merged. Also, a lexical ordering
   /// isn't always optimal.
+
+  /// How many regs used so far for the candidate fomulae or solution.
   unsigned NumRegs;
-  unsigned AddRecCost;
-  unsigned NumIVMuls;
-  unsigned NumBaseAdds;
-  unsigned ImmCost;
+
+  /// Inst number cost.
+  unsigned InstNumCost;
+  /// Cost of regs which may spill.
+  unsigned SpillRegsCost;
+  /// Inst complexity cost.
+  unsigned InstCmplxCost;
+  /// Inst cost inserted in loop preheader.
   unsigned SetupCost;
-  unsigned ScaleCost;
 
 public:
   Cost()
-    : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0),
-      SetupCost(0), ScaleCost(0) {}
+      : NumRegs(0), InstNumCost(0), SpillRegsCost(0), InstCmplxCost(0),
+        SetupCost(0) {}
 
   bool operator<(const Cost &Other) const;
 
@@ -879,16 +886,15 @@
 #ifndef NDEBUG
   // Once any of the metrics loses, they must all remain losers.
   bool isValid() {
-    return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds
-             | ImmCost | SetupCost | ScaleCost) != ~0u)
-      || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds
-           & ImmCost & SetupCost & ScaleCost) == ~0u);
+    return (
+        ((InstNumCost | SpillRegsCost | InstCmplxCost | SetupCost) != ~0u) ||
+        ((InstNumCost & SpillRegsCost & InstCmplxCost & SetupCost) == ~0u));
   }
 #endif
 
   bool isLoser() {
     assert(isValid() && "invalid cost");
-    return NumRegs == ~0u;
+    return InstNumCost == ~0u;
   }
 
   void RateFormula(const TargetTransformInfo &TTI,
@@ -905,24 +911,25 @@
   void dump() const;
 
 private:
-  void RateRegister(const SCEV *Reg,
-                    SmallPtrSetImpl<const SCEV *> &Regs,
-                    const Loop *L,
-                    ScalarEvolution &SE, DominatorTree &DT);
-  void RatePrimaryRegister(const SCEV *Reg,
-                           SmallPtrSetImpl<const SCEV *> &Regs,
-                           const Loop *L,
+  void RateRegister(const TargetTransformInfo &TTI, const SCEV *Reg,
+                    SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L,
+                    ScalarEvolution &SE, DominatorTree &DT, const LSRUse &LU,
+                    bool &IsIndexedLoadStore);
+  void RatePrimaryRegister(const TargetTransformInfo &TTI, const SCEV *Reg,
+                           SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L,
                            ScalarEvolution &SE, DominatorTree &DT,
-                           SmallPtrSetImpl<const SCEV *> *LoserRegs);
+                           SmallPtrSetImpl<const SCEV *> *LoserRegs,
+                           const LSRUse &LU, bool &IsIndexedLoadStore);
+  void UpdateSpillRegsCost(const TargetTransformInfo &TTI);
 };
 
 }
 
 /// RateRegister - Tally up interesting quantities from the given register.
-void Cost::RateRegister(const SCEV *Reg,
-                        SmallPtrSetImpl<const SCEV *> &Regs,
-                        const Loop *L,
-                        ScalarEvolution &SE, DominatorTree &DT) {
+void Cost::RateRegister(const TargetTransformInfo &TTI, const SCEV *Reg,
+                        SmallPtrSetImpl<const SCEV *> &Regs, const Loop *L,
+                        ScalarEvolution &SE, DominatorTree &DT,
+                        const LSRUse &LU, bool &IsIndexedLoadStore) {
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
     // If this is an addrec for another loop, don't second-guess its addrec phi
     // nodes. LSR isn't currently smart enough to reason about more than one
@@ -937,13 +944,50 @@
       Lose();
       return;
     }
-    AddRecCost += 1; /// TODO: This should be a function of the stride.
+
+    // Usually we need a separate add/sub insn for every AddRec. But
+    // if the target support pre/postinc addressing mode, the following
+    // pattern (use AARCH64 as an example):
+    //    adrp    x10, a
+    //    add     x10, x10, :lo12:a
+    // .LBB0_2:
+    //    add     x12, x10, x9, lsl #2  // add insn for AddRec used by store
+    //    add     x9, x9, #1
+    //    str     w11, [x12, #48]
+    //    cmp     x9, x8
+    //    b.lt    .LBB0_2
+    // can be optimized to this form using postinc addressing mode:
+    //    adrp    x10, a
+    //    add     x10, x10, :lo12:a
+    //    add     x10, x10, #48
+    // .LBB0_2:
+    //    str     w11, [x10], #4
+    //    add     x9, x9, #1
+    //    cmp     x9, x8
+    //    b.lt    .LBB0_2
+    // And no separate add insn is required for the AddRec. If pre/postinc
+    // addressing mode is used, we record it in IsIndexedLoadStore because
+    // it usually means the load/store cannot fold other regs or offsets
+    // into the same insn.
+    if (AR->isAffine() && isa<SCEVConstant>(AR->getOperand(1))) {
+      int64_t stride = dyn_cast_or_null<SCEVConstant>(AR->getOperand(1))
+                           ->getValue()
+                           ->getValue()
+                           .getSExtValue();
+      if (stride != 0 && TTI.isAddRecFoldable(stride) && isAddressType(LU))
+        IsIndexedLoadStore = true;
+      else
+        InstNumCost += 1;
+    } else {
+      InstNumCost += 1;
+    }
 
     // Add the step value register, if it needs one.
     // TODO: The non-affine case isn't precisely modeled here.
     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
       if (!Regs.count(AR->getOperand(1))) {
-        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+        RateRegister(TTI, AR->getOperand(1), Regs, L, SE, DT, LU,
+                     IsIndexedLoadStore);
         if (isLoser())
           return;
       }
@@ -960,24 +1004,24 @@
          isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart()))))
     ++SetupCost;
 
-    NumIVMuls += isa<SCEVMulExpr>(Reg) &&
-                 SE.hasComputableLoopEvolution(Reg, L);
+  InstNumCost += isa<SCEVMulExpr>(Reg) && SE.hasComputableLoopEvolution(Reg, L);
 }
 
 /// RatePrimaryRegister - Record this register in the set. If we haven't seen it
 /// before, rate it. Optional LoserRegs provides a way to declare any formula
 /// that refers to one of those regs an instant loser.
-void Cost::RatePrimaryRegister(const SCEV *Reg,
+void Cost::RatePrimaryRegister(const TargetTransformInfo &TTI, const SCEV *Reg,
                                SmallPtrSetImpl<const SCEV *> &Regs,
-                               const Loop *L,
-                               ScalarEvolution &SE, DominatorTree &DT,
-                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+                               const Loop *L, ScalarEvolution &SE,
+                               DominatorTree &DT,
+                               SmallPtrSetImpl<const SCEV *> *LoserRegs,
+                               const LSRUse &LU, bool &IsIndexedLoadStore) {
   if (LoserRegs && LoserRegs->count(Reg)) {
     Lose();
     return;
   }
   if (Regs.insert(Reg).second) {
-    RateRegister(Reg, Regs, L, SE, DT);
+    RateRegister(TTI, Reg, Regs, L, SE, DT, LU, IsIndexedLoadStore);
     if (LoserRegs && isLoser())
       LoserRegs->insert(Reg);
   }
@@ -992,6 +1036,8 @@
                        ScalarEvolution &SE, DominatorTree &DT,
                        const LSRUse &LU,
                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+  // Show whether pre/postinc addressing mode is used for the formula.
+  bool IsIndexedLoadStore = false;
   assert(F.isCanonical() && "Cost is accurate only for canonical formula");
   // Tally up the registers.
   if (const SCEV *ScaledReg = F.ScaledReg) {
@@ -999,7 +1045,8 @@
       Lose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
+    RatePrimaryRegister(TTI, ScaledReg, Regs, L, SE, DT, LoserRegs, LU,
+                        IsIndexedLoadStore);
     if (isLoser())
       return;
   }
@@ -1010,71 +1057,76 @@
       Lose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
+    RatePrimaryRegister(TTI, BaseReg, Regs, L, SE, DT, LoserRegs, LU,
+                        IsIndexedLoadStore);
     if (isLoser())
       return;
   }
 
   // Determine how many (unfolded) adds we'll need inside the loop.
   size_t NumBaseParts = F.getNumRegs();
+  unsigned NumBaseAdds = 0;
   if (NumBaseParts > 1)
     // Do not count the base and a possible second register if the target
     // allows to fold 2 registers.
     NumBaseAdds +=
-        NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F)));
+        NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(TTI, LU, F) &&
+                             !IsIndexedLoadStore));
   NumBaseAdds += (F.UnfoldedOffset != 0);
+  InstNumCost += NumBaseAdds;
 
   // Accumulate non-free scaling amounts.
-  ScaleCost += getScalingFactorCost(TTI, LU, F);
+  InstCmplxCost += getScalingFactorCost(TTI, LU, F, InstNumCost);
 
   // Tally up the non-zero immediates.
+  unsigned ImmCost = 0;
   for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(),
        E = Offsets.end(); I != E; ++I) {
     int64_t Offset = (uint64_t)*I + F.BaseOffset;
-    if (F.BaseGV)
-      ImmCost += 64; // Handle symbolic values conservatively.
-                     // TODO: This should probably be the pointer size.
-    else if (Offset != 0)
-      ImmCost += APInt(64, Offset, true).getMinSignedBits();
+    if (Offset != 0 && IsIndexedLoadStore)
+      ImmCost += 1;
   }
+  InstNumCost += ImmCost;
+
+  UpdateSpillRegsCost(TTI);
   assert(isValid() && "invalid cost");
 }
 
 /// Lose - Set this cost to a losing value.
 void Cost::Lose() {
   NumRegs = ~0u;
-  AddRecCost = ~0u;
-  NumIVMuls = ~0u;
-  NumBaseAdds = ~0u;
-  ImmCost = ~0u;
+  InstNumCost = ~0u;
+  SpillRegsCost = ~0u;
+  InstCmplxCost = ~0u;
   SetupCost = ~0u;
-  ScaleCost = ~0u;
+}
+
+/// UpdateSpillRegsCost -- Update the SpillRegsCost according to
+/// register pressure.
+void Cost::UpdateSpillRegsCost(const TargetTransformInfo &TTI) {
+  SpillRegsCost = (NumRegs > TTI.getNumberOfRegisters(false))
+                      ? NumRegs - TTI.getNumberOfRegisters(false)
+                      : 0;
 }
 
 /// operator< - Choose the lower cost.
 bool Cost::operator<(const Cost &Other) const {
-  return std::tie(NumRegs, AddRecCost, NumIVMuls, NumBaseAdds, ScaleCost,
-                  ImmCost, SetupCost) <
-         std::tie(Other.NumRegs, Other.AddRecCost, Other.NumIVMuls,
-                  Other.NumBaseAdds, Other.ScaleCost, Other.ImmCost,
-                  Other.SetupCost);
+  unsigned InstCost = InstNumCost + SpillRegsCost;
+  unsigned OtherInstCost = Other.InstNumCost + Other.SpillRegsCost;
+  return std::tie(InstCost, InstCmplxCost, SetupCost) <
+         std::tie(OtherInstCost, Other.InstCmplxCost, Other.SetupCost);
 }
 
 void Cost::print(raw_ostream &OS) const {
-  OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s");
-  if (AddRecCost != 0)
-    OS << ", with addrec cost " << AddRecCost;
-  if (NumIVMuls != 0)
-    OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s");
-  if (NumBaseAdds != 0)
-    OS << ", plus " << NumBaseAdds << " base add"
-       << (NumBaseAdds == 1 ? "" : "s");
-  if (ScaleCost != 0)
-    OS << ", plus " << ScaleCost << " scale cost";
-  if (ImmCost != 0)
-    OS << ", plus " << ImmCost << " imm cost";
+  OS << NumRegs << " Reg" << (NumRegs == 1 ? "" : "s");
+  if (InstNumCost != 0)
+    OS << ", plus " << InstNumCost << " InstNumCost";
+  if (SpillRegsCost != 0)
+    OS << ", plus " << InstNumCost << " SpillRegsCost";
+  if (InstCmplxCost != 0)
+    OS << ", plus " << InstCmplxCost << " InstCmplxCost";
   if (SetupCost != 0)
-    OS << ", plus " << SetupCost << " setup cost";
+    OS << ", plus " << SetupCost << " SetupCost";
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
@@ -1497,16 +1549,23 @@
                               F.Scale);
 }
 
+static bool isAddressType(const LSRUse &LU) {
+  return LU.Kind == LSRUse::Address;
+}
+
 static unsigned getScalingFactorCost(const TargetTransformInfo &TTI,
-                                     const LSRUse &LU, const Formula &F) {
+                                     const LSRUse &LU, const Formula &F,
+                                     unsigned &InstNumCost) {
   if (!F.Scale)
     return 0;
 
   // If the use is not completely folded in that instruction, we will have to
   // pay an extra cost only for scale != 1.
   if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
-                            LU.AccessTy, F))
-    return F.Scale != 1;
+                            LU.AccessTy, F)) {
+    InstNumCost += 1;
+    return 0;
+  }
 
   switch (LU.Kind) {
   case LSRUse::Address: {
@@ -1527,8 +1586,8 @@
   case LSRUse::ICmpZero:
   case LSRUse::Basic:
   case LSRUse::Special:
-    // The use is completely folded, i.e., everything is folded into the
-    // instruction.
+    if (!(F.getNumRegs() == 1 && F.BaseOffset == 0 && F.UnfoldedOffset == 0))
+      InstNumCost += 1;
     return 0;
   }
 
@@ -1773,6 +1832,8 @@
                     DenseSet<const SCEV *> &VisitedRegs) const;
   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
 
+  void DumpSolution(SmallVectorImpl<const Formula *> &Solution,
+                    Cost &SolutionCost, std::string HeadStr) const;
   BasicBlock::iterator
     HoistInsertPosition(BasicBlock::iterator IP,
                         const SmallVectorImpl<Instruction *> &Inputs) const;
@@ -4296,42 +4357,12 @@
 
   const LSRUse &LU = Uses[Workspace.size()];
 
-  // If this use references any register that's already a part of the
-  // in-progress solution, consider it a requirement that a formula must
-  // reference that register in order to be considered. This prunes out
-  // unprofitable searching.
-  SmallSetVector<const SCEV *, 4> ReqRegs;
-  for (const SCEV *S : CurRegs)
-    if (LU.Regs.count(S))
-      ReqRegs.insert(S);
-
   SmallPtrSet<const SCEV *, 16> NewRegs;
   Cost NewCost;
   for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(),
        E = LU.Formulae.end(); I != E; ++I) {
     const Formula &F = *I;
 
-    // Ignore formulae which may not be ideal in terms of register reuse of
-    // ReqRegs.  The formula should use all required registers before
-    // introducing new ones.
-    int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
-    for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(),
-         JE = ReqRegs.end(); J != JE; ++J) {
-      const SCEV *Reg = *J;
-      if ((F.ScaledReg && F.ScaledReg == Reg) ||
-          std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
-          F.BaseRegs.end()) {
-        --NumReqRegsToFind;
-        if (NumReqRegsToFind == 0)
-          break;
-      }
-    }
-    if (NumReqRegsToFind != 0) {
-      // If none of the formulae satisfied the required registers, then we could
-      // clear ReqRegs and try again. Currently, we simply give up in this case.
-      continue;
-    }
-
     // Evaluate the cost of the current formula. If it's already worse than
     // the current best, prune the search at that point.
     NewCost = CurCost;
@@ -4346,14 +4377,9 @@
         if (F.getNumRegs() == 1 && Workspace.size() == 1)
           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
       } else {
-        DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
-              dbgs() << ".\n Regs:";
-              for (const SCEV *S : NewRegs)
-                dbgs() << ' ' << *S;
-              dbgs() << '\n');
-
         SolutionCost = NewCost;
         Solution = Workspace;
+        DumpSolution(Solution, SolutionCost, "New best at ");
       }
       Workspace.pop_back();
     }
@@ -4380,9 +4406,15 @@
   }
 
   // Ok, we've now made all our decisions.
-  DEBUG(dbgs() << "\n"
-                  "The chosen solution requires "; SolutionCost.print(dbgs());
-        dbgs() << ":\n";
+  DumpSolution(Solution, SolutionCost, "The chosen solution requires ");
+
+  assert(Solution.size() == Uses.size() && "Malformed solution!");
+}
+
+/// DumpSolution - Dump the existing solution.
+void LSRInstance::DumpSolution(SmallVectorImpl<const Formula *> &Solution,
+                               Cost &SolutionCost, std::string HeadStr) const {
+  DEBUG(dbgs() << "\n" << HeadStr; SolutionCost.print(dbgs()); dbgs() << ":\n";
         for (size_t i = 0, e = Uses.size(); i != e; ++i) {
           dbgs() << "  ";
           Uses[i].print(dbgs());
@@ -4391,8 +4423,6 @@
           Solution[i]->print(dbgs());
           dbgs() << '\n';
         });
-
-  assert(Solution.size() == Uses.size() && "Malformed solution!");
 }
 
 /// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up