Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -739,17 +739,76 @@
 // cheaply estimate cost for full unrolling when we don't want to symbolically
 // evaluate all iterations.
 class UnrollCostEstimator {
+  Loop &TheLoop;
+  const TargetTransformInfo &TTI;
+  ScalarEvolution &SE;
+  // Note: Both "size" fields here are in units of TTI->getUserCost(, CodeSize),
+  // not instruction counts.
   const unsigned LoopSize;
+  Optional<unsigned> UniformSize;
 
 public:
-  UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
+  UnrollCostEstimator(Loop &L, const TargetTransformInfo &TTI,
+                      ScalarEvolution &SE, unsigned LoopSize)
+    : TheLoop(L), TTI(TTI), SE(SE), LoopSize(LoopSize) {}
+
+
+  bool
+  isUnrolledLoopUnderThreshold(TargetTransformInfo::UnrollingPreferences &UP,
+                               unsigned Threshold) {
+    // Check the cheap form first before falling back to the expensive
+    // SCEV queries.  Most loops are small, so the expensive path should
+    // rarely run.
+    if (getUnrolledLoopSize(UP, false) < Threshold)
+      return true;
+    return getUnrolledLoopSize(UP, true) < Threshold;
+  }
+
+private:
 
   // Returns loop size estimation for unrolled loop, given the unrolling
   // configuration specified by UP.
-  uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
-    assert(LoopSize >= UP.BEInsns &&
+  uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP,
+                               bool UseUniform) {
+    unsigned UniformSize = UseUniform ? computeUniformInstCost() : 0;
+    assert(LoopSize >= (UP.BEInsns + UniformSize) &&
            "LoopSize should not be less than BEInsns!");
-    return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+    return (uint64_t)(LoopSize - UP.BEInsns - UniformSize) * UP.Count +
+      UP.BEInsns + UniformSize;
+  }
+
+  unsigned computeUniformInstCost() {
+    if (UniformSize)
+      return *UniformSize;
+
+    // We want to use SCEV's notion of invariance to handle cases such as:
+    // loop:
+    //   %a = udiv i32 %arg1, %arg2
+    //   %b = zext i32 %a to i64
+    // (both a and b are invariant)
+    // However, computing SCEVs is expensive.  We defer that work until
+    // we've found at least one trivially invariant operation.  For an
+    // example like the above, we must have at least one trivially
+    // invariant instruction (%a in this example).
+    const bool CouldBeInvariant =
+      llvm::any_of(TheLoop.blocks(),
+                   [&](BasicBlock *BB) {
+                     return llvm::any_of(*BB, [&](Instruction &I) {
+                       return TheLoop.hasLoopInvariantOperands(&I);
+                     });
+                   });
+    if (!CouldBeInvariant) {
+      UniformSize = 0;
+      return *UniformSize;
+    }
+    InstructionCost Size = 0;
+    for (BasicBlock *BB : TheLoop.blocks())
+      for (Instruction &I : *BB)
+        if (SE.isSCEVable(I.getType()) &&
+            SE.isLoopInvariant(SE.getSCEV(&I), &TheLoop))
+          Size += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
+    UniformSize = *Size.getValue();
+    return *UniformSize;
   }
 };
 
@@ -769,7 +828,7 @@
     TargetTransformInfo::UnrollingPreferences &UP,
     TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
 
-  UnrollCostEstimator UCE(*L, LoopSize);
+  UnrollCostEstimator UCE(*L, TTI, SE, LoopSize);
 
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
@@ -778,7 +837,7 @@
     UP.Count = UnrollCount;
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
-    if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+    if (UP.AllowRemainder && UCE.isUnrolledLoopUnderThreshold(UP, UP.Threshold))
       return true;
   }
 
@@ -790,13 +849,13 @@
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
     if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
-        UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+        UCE.isUnrolledLoopUnderThreshold(UP, PragmaUnrollThreshold))
       return true;
   }
   bool PragmaFullUnroll = hasUnrollFullPragma(L);
   if (PragmaFullUnroll && TripCount != 0) {
     UP.Count = TripCount;
-    if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+    if (UCE.isUnrolledLoopUnderThreshold(UP, PragmaUnrollThreshold))
       return false;
   }
 
@@ -845,7 +904,7 @@
   if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
     // When computing the unrolled size, note that BEInsns are not replicated
     // like the rest of the loop body.
-    if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+    if (UCE.isUnrolledLoopUnderThreshold(UP, UP.Threshold)) {
       UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
       TripCount = FullUnrollTripCount;
       TripMultiple = UP.UpperBound ? 1 : TripMultiple;
@@ -892,7 +951,7 @@
       UP.Count = TripCount;
     if (UP.PartialThreshold != NoThreshold) {
       // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+      if (!UCE.isUnrolledLoopUnderThreshold(UP, UP.PartialThreshold))
         UP.Count =
             (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
             (LoopSize - UP.BEInsns);
@@ -907,7 +966,7 @@
         // remainder loop is allowed.
         UP.Count = UP.DefaultUnrollRuntimeCount;
         while (UP.Count != 0 &&
-               UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+               !UCE.isUnrolledLoopUnderThreshold(UP, UP.PartialThreshold))
           UP.Count >>= 1;
       }
       if (UP.Count < 2) {
@@ -991,7 +1050,7 @@
   // Reduce unroll count to be the largest power-of-two factor of
   // the original count which satisfies the threshold limit.
   while (UP.Count != 0 &&
-         UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+         !UCE.isUnrolledLoopUnderThreshold(UP, UP.PartialThreshold))
     UP.Count >>= 1;
 
 #ifndef NDEBUG