diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
--- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -15,8 +15,10 @@
 #ifndef LLVM_ANALYSIS_LOOPUNROLLANALYZER_H
 #define LLVM_ANALYSIS_LOOPUNROLLANALYZER_H
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/InstVisitor.h"
 
 // This class is used to get an estimate of the optimization effects that we
@@ -36,6 +38,55 @@
 // And finally:
 //   v = b[1]
 namespace llvm {
+
+namespace {
+/// A struct to densely store the state of an instruction after unrolling at
+/// each iteration.
+///
+/// This is designed to work like a tuple of <Instruction *, int> for the
+/// purposes of hashing and lookup, but to be able to associate two boolean
+/// states with each key.
+struct UnrolledInstState {
+  Instruction *I;
+  int Iteration : 30;
+  unsigned IsFree : 1;
+  unsigned IsCounted : 1;
+};
+
+/// Hashing and equality testing for a set of the instruction states.
+struct UnrolledInstStateKeyInfo {
+  using PtrInfo = DenseMapInfo<Instruction *>;
+  using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
+
+  static inline UnrolledInstState getEmptyKey() {
+    return {PtrInfo::getEmptyKey(), 0, 0, 0};
+  }
+
+  static inline UnrolledInstState getTombstoneKey() {
+    return {PtrInfo::getTombstoneKey(), 0, 0, 0};
+  }
+
+  static inline unsigned getHashValue(const UnrolledInstState &S) {
+    return PairInfo::getHashValue({S.I, S.Iteration});
+  }
+
+  static inline bool isEqual(const UnrolledInstState &LHS,
+                             const UnrolledInstState &RHS) {
+    return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
+  }
+};
+
+struct EstimatedUnrollCost {
+  /// The estimated cost after unrolling.
+  unsigned UnrolledCost;
+
+  /// The estimated dynamic cost of executing the instructions in the
+  /// rolled form.
+  unsigned RolledDynamicCost;
+};
+
+} // namespace
+
 class UnrolledInstAnalyzer : private InstVisitor<UnrolledInstAnalyzer, bool> {
   typedef InstVisitor<UnrolledInstAnalyzer, bool> Base;
   friend class InstVisitor<UnrolledInstAnalyzer, bool>;
@@ -45,16 +96,27 @@
   };
 
 public:
-  UnrolledInstAnalyzer(unsigned Iteration,
-                       DenseMap<Value *, Constant *> &SimplifiedValues,
-                       ScalarEvolution &SE, const Loop *L)
-      : SimplifiedValues(SimplifiedValues), SE(SE), L(L) {
-      IterationNumber = SE.getConstant(APInt(64, Iteration));
+  UnrolledInstAnalyzer(
+      unsigned Iteration, DenseMap<Value *, Constant *> &SimplifiedValues,
+      const SmallPtrSetImpl<const Value *> &EphValues,
+      DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> &InstCostMap,
+      unsigned &RolledDynamicCost, unsigned &UnrolledCost,
+      unsigned MaxUnrolledLoopSize, ScalarEvolution &SE,
+      const TargetTransformInfo &TTI, const Loop *L)
+      : SimplifiedValues(SimplifiedValues), EphValues(EphValues),
+        InstCostMap(InstCostMap), RolledDynamicCost(RolledDynamicCost),
+        UnrolledCost(UnrolledCost), MaxUnrolledLoopSize(MaxUnrolledLoopSize),
+        SE(SE), TTI(TTI), L(L) {
+    IterationNumber = SE.getConstant(APInt(64, Iteration));
   }
 
   // Allow access to the initial visit method.
   using Base::visit;
 
+  bool visitBB(BasicBlock &B);
+
+  void AddCostRecursively(Instruction &RootI, int Iteration);
+
 private:
   /// A cache of pointer bases and constant-folded offsets corresponding
   /// to GEP (or derived from GEP) instructions.
@@ -77,8 +139,15 @@
   /// constants after complete unrolling, and account for likely simplifications
   /// post-unrolling.
   DenseMap<Value *, Constant *> &SimplifiedValues;
+  const SmallPtrSetImpl<const Value *> &EphValues;
+  DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> &InstCostMap;
+
+  unsigned &RolledDynamicCost;
+  unsigned &UnrolledCost;
+  unsigned MaxUnrolledLoopSize;
 
   ScalarEvolution &SE;
+  const TargetTransformInfo &TTI;
   const Loop *L;
 
   bool simplifyInstWithSCEV(Instruction *I);
diff --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
--- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -16,6 +16,94 @@
 
 using namespace llvm;
 
+#define DEBUG_TYPE "loop-unroll"
+
+// Helper function to accumulate cost for instructions in the loop.
+void UnrolledInstAnalyzer::AddCostRecursively(Instruction &RootI,
+                                              int Iteration) {
+  // A small worklist used to accumulate cost of instructions from each
+  // observable and reached root in the loop.
+  SmallVector<Instruction *, 16> CostWorklist;
+
+  // PHI-used worklist used between iterations while accumulating cost.
+  SmallVector<Instruction *, 4> PHIUsedList;
+
+  assert(Iteration >= 0 && "Cannot have a negative iteration!");
+  assert(CostWorklist.empty() && "Must start with an empty cost list");
+  assert(PHIUsedList.empty() && "Must start with an empty phi used list");
+  CostWorklist.push_back(&RootI);
+  for (;; --Iteration) {
+    do {
+      Instruction *I = CostWorklist.pop_back_val();
+
+      // InstCostMap only uses I and Iteration as a key, the other two values
+      // don't matter here.
+      auto CostIter = InstCostMap.find({I, Iteration, 0, 0});
+      if (CostIter == InstCostMap.end())
+        // If an input to a PHI node comes from a dead path through the loop
+        // we may have no cost data for it here. What that actually means is
+        // that it is free.
+        continue;
+      auto &Cost = *CostIter;
+      if (Cost.IsCounted)
+        // Already counted this instruction.
+        continue;
+
+      // Mark that we are counting the cost of this instruction now.
+      Cost.IsCounted = true;
+
+      // If this is a PHI node in the loop header, just add it to the PHI set.
+      if (auto *PhiI = dyn_cast<PHINode>(I))
+        if (PhiI->getParent() == L->getHeader()) {
+          assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
+                                "inherently simplify during unrolling.");
+          if (Iteration == 0)
+            continue;
+
+          // Push the incoming value from the backedge into the PHI used list
+          // if it is an in-loop instruction. We'll use this to populate the
+          // cost worklist for the next iteration (as we count backwards).
+          if (auto *OpI = dyn_cast<Instruction>(
+                  PhiI->getIncomingValueForBlock(L->getLoopLatch())))
+            if (L->contains(OpI))
+              PHIUsedList.push_back(OpI);
+          continue;
+        }
+
+      // First accumulate the cost of this instruction.
+      if (!Cost.IsFree) {
+        UnrolledCost += TTI.getUserCost(I);
+        LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
+                          << Iteration << "): ");
+        LLVM_DEBUG(I->dump());
+      }
+
+      // We must count the cost of every operand which is not free,
+      // recursively. If we reach a loop PHI node, simply add it to the set
+      // to be considered on the next iteration (backwards!).
+      for (Value *Op : I->operands()) {
+        // Check whether this operand is free due to being a constant or
+        // outside the loop.
+        auto *OpI = dyn_cast<Instruction>(Op);
+        if (!OpI || !L->contains(OpI))
+          continue;
+
+        // Otherwise accumulate its cost.
+        CostWorklist.push_back(OpI);
+      }
+    } while (!CostWorklist.empty());
+
+    if (PHIUsedList.empty())
+      // We've exhausted the search.
+      break;
+
+    assert(Iteration > 0 &&
+           "Cannot track PHI-used values past the first iteration!");
+    CostWorklist.append(PHIUsedList.begin(), PHIUsedList.end());
+    PHIUsedList.clear();
+  }
+};
+
 /// Try to simplify instruction \param I using its SCEV expression.
 ///
 /// The idea is that some AddRec expressions become constants, which then
@@ -212,3 +300,57 @@
   // The loop induction PHI nodes are definitionally free.
   return PN.getParent() == L->getHeader();
 }
+
+bool UnrolledInstAnalyzer::visitBB(BasicBlock &BB) {
+  int Iteration =
+      cast<SCEVConstant>(IterationNumber)->getValue()->getZExtValue();
+  for (Instruction &I : BB) {
+    // These won't get into the final code - don't even try calculating the
+    // cost for them.
+    if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
+      continue;
+
+    // Track this instruction's expected baseline cost when executing the
+    // rolled loop form.
+    RolledDynamicCost += TTI.getUserCost(&I);
+
+    // Visit the instruction to analyze its loop cost after unrolling,
+    // and if the visitor returns true, mark the instruction as free after
+    // unrolling and continue.
+    bool IsFree = visit(I);
+    bool Inserted = InstCostMap
+                        .insert({&I, (int)Iteration, (unsigned)IsFree,
+                                 /*IsCounted*/ false})
+                        .second;
+    (void)Inserted;
+    assert(Inserted && "Cannot have a state for an unvisited instruction!");
+
+    if (IsFree)
+      continue;
+
+    // Can't properly model a cost of a call.
+    // FIXME: With a proper cost model we should be able to do it.
+    if (auto *CI = dyn_cast<CallInst>(&I)) {
+      const Function *Callee = CI->getCalledFunction();
+      if (!Callee || TTI.isLoweredToCall(Callee)) {
+        LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
+        return false;
+      }
+    }
+
+    // If the instruction might have a side-effect recursively account for
+    // the cost of it and all the instructions leading up to it.
+    if (I.mayHaveSideEffects())
+      AddCostRecursively(I, Iteration);
+
+    // If unrolled body turns out to be too big, bail out.
+    if (UnrolledCost > MaxUnrolledLoopSize) {
+      LLVM_DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
+                        << "  UnrolledCost: " << UnrolledCost
+                        << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
+                        << "\n");
+      return false;
+    }
+  }
+  return true;
+}
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -257,51 +257,6 @@
 
 namespace {
 
-/// A struct to densely store the state of an instruction after unrolling at
-/// each iteration.
-///
-/// This is designed to work like a tuple of <Instruction *, int> for the
-/// purposes of hashing and lookup, but to be able to associate two boolean
-/// states with each key.
-struct UnrolledInstState {
-  Instruction *I;
-  int Iteration : 30;
-  unsigned IsFree : 1;
-  unsigned IsCounted : 1;
-};
-
-/// Hashing and equality testing for a set of the instruction states.
-struct UnrolledInstStateKeyInfo {
-  using PtrInfo = DenseMapInfo<Instruction *>;
-  using PairInfo = DenseMapInfo<std::pair<Instruction *, int>>;
-
-  static inline UnrolledInstState getEmptyKey() {
-    return {PtrInfo::getEmptyKey(), 0, 0, 0};
-  }
-
-  static inline UnrolledInstState getTombstoneKey() {
-    return {PtrInfo::getTombstoneKey(), 0, 0, 0};
-  }
-
-  static inline unsigned getHashValue(const UnrolledInstState &S) {
-    return PairInfo::getHashValue({S.I, S.Iteration});
-  }
-
-  static inline bool isEqual(const UnrolledInstState &LHS,
-                             const UnrolledInstState &RHS) {
-    return PairInfo::isEqual({LHS.I, LHS.Iteration}, {RHS.I, RHS.Iteration});
-  }
-};
-
-struct EstimatedUnrollCost {
-  /// The estimated cost after unrolling.
-  unsigned UnrolledCost;
-
-  /// The estimated dynamic cost of executing the instructions in the
-  /// rolled form.
-  unsigned RolledDynamicCost;
-};
-
 } // end anonymous namespace
 
 /// Figure out if the loop is worth full unrolling.
@@ -487,7 +442,9 @@
     while (!SimplifiedInputValues.empty())
       SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
 
-    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
+    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, EphValues,
+                                  InstCostMap, RolledDynamicCost, UnrolledCost,
+                                  MaxUnrolledLoopSize, SE, TTI, L);
 
     BBWorklist.clear();
     BBWorklist.insert(L->getHeader());
@@ -498,53 +455,8 @@
       // Visit all instructions in the given basic block and try to simplify
       // it.  We don't change the actual IR, just count optimization
       // opportunities.
-      for (Instruction &I : *BB) {
-        // These won't get into the final code - don't even try calculating the
-        // cost for them.
-        if (isa<DbgInfoIntrinsic>(I) || EphValues.count(&I))
-          continue;
-
-        // Track this instruction's expected baseline cost when executing the
-        // rolled loop form.
-        RolledDynamicCost += TTI.getUserCost(&I);
-
-        // Visit the instruction to analyze its loop cost after unrolling,
-        // and if the visitor returns true, mark the instruction as free after
-        // unrolling and continue.
-        bool IsFree = Analyzer.visit(I);
-        bool Inserted = InstCostMap.insert({&I, (int)Iteration,
-                                           (unsigned)IsFree,
-                                           /*IsCounted*/ false}).second;
-        (void)Inserted;
-        assert(Inserted && "Cannot have a state for an unvisited instruction!");
-
-        if (IsFree)
-          continue;
-
-        // Can't properly model a cost of a call.
-        // FIXME: With a proper cost model we should be able to do it.
-        if (auto *CI = dyn_cast<CallInst>(&I)) {
-          const Function *Callee = CI->getCalledFunction();
-          if (!Callee || TTI.isLoweredToCall(Callee)) {
-            LLVM_DEBUG(dbgs() << "Can't analyze cost of loop with call\n");
-            return None;
-          }
-        }
-
-        // If the instruction might have a side-effect recursively account for
-        // the cost of it and all the instructions leading up to it.
-        if (I.mayHaveSideEffects())
-          AddCostRecursively(I, Iteration);
-
-        // If unrolled body turns out to be too big, bail out.
-        if (UnrolledCost > MaxUnrolledLoopSize) {
-          LLVM_DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
-                            << "  UnrolledCost: " << UnrolledCost
-                            << ", MaxUnrolledLoopSize: " << MaxUnrolledLoopSize
-                            << "\n");
-          return None;
-        }
-      }
+      if (!Analyzer.visitBB(*BB))
+        return None;
 
       Instruction *TI = BB->getTerminator();
 
@@ -588,7 +500,7 @@
           BBWorklist.insert(Succ);
         else
           ExitWorklist.insert({BB, Succ});
-      AddCostRecursively(*TI, Iteration);
+      Analyzer.AddCostRecursively(*TI, Iteration);
     }
 
     // If we found no optimization opportunities on the first iteration, we
diff --git a/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp b/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp
--- a/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp
+++ b/llvm/unittests/Analysis/UnrollAnalyzerTest.cpp
@@ -26,6 +26,8 @@
   bool runOnFunction(Function &F) override {
     LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
     ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+    const TargetTransformInfo &TTI =
+        getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 
     Function::iterator FI = F.begin();
     FI++; // First basic block is entry - skip it.
@@ -33,11 +35,16 @@
     Loop *L = LI->getLoopFor(Header);
     BasicBlock *Exiting = L->getExitingBlock();
 
+    DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
+    unsigned UnrolledCost = 0;
+    unsigned RolledDynamicCost = 0;
+
+    SmallPtrSet<const Value *, 32> EphValues;
     SimplifiedValuesVector.clear();
     TripCount = SE->getSmallConstantTripCount(L, Exiting);
     for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) {
       DenseMap<Value *, Constant *> SimplifiedValues;
-      UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE, L);
+      UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, EphValues, InstCostMap, RolledDynamicCost, UnrolledCost, 0,*SE, TTI, L);
       for (auto *BB : L->getBlocks())
         for (Instruction &I : *BB)
           Analyzer.visit(I);
@@ -46,6 +53,7 @@
     return false;
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<TargetTransformInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<ScalarEvolutionWrapperPass>();
@@ -322,6 +330,7 @@
 
 INITIALIZE_PASS_BEGIN(UnrollAnalyzerTest, "unrollanalyzertestpass",
                       "unrollanalyzertestpass", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)