Index: llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -739,17 +739,55 @@
 // cheaply estimate cost for full unrolling when we don't want to symbolically
 // evaluate all iterations.
 class UnrollCostEstimator {
+  Loop &TheLoop;
+  const TargetTransformInfo &TTI;
+  ScalarEvolution &SE;
+  // Note: Both "size" fields here are in units of TTI->getUserCost(, CodeSize),
+  // not instruction counts.
   const unsigned LoopSize;
+  Optional<unsigned> UniformSize;
 
 public:
-  UnrollCostEstimator(Loop &L, unsigned LoopSize) : LoopSize(LoopSize) {}
+  UnrollCostEstimator(Loop &L, const TargetTransformInfo &TTI,
+                      ScalarEvolution &SE, unsigned LoopSize)
+    : TheLoop(L), TTI(TTI), SE(SE), LoopSize(LoopSize) {}
+
+
+  bool
+  isUnrolledLoopUnderThreshold(TargetTransformInfo::UnrollingPreferences &UP,
+                               unsigned Threshold) {
+    // Check the cheap form first before falling back to the expensive
+    // SCEV queries.  Most loops are small, so the expensive path should
+    // rarely run.
+    if (getUnrolledLoopSize(UP, false) < Threshold)
+      return true;
+    return getUnrolledLoopSize(UP, true) < Threshold;
+  }
+
+private:
 
   // Returns loop size estimation for unrolled loop, given the unrolling
   // configuration specified by UP.
-  uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP) {
-    assert(LoopSize >= UP.BEInsns &&
+  uint64_t getUnrolledLoopSize(TargetTransformInfo::UnrollingPreferences &UP,
+                               bool UseUniform) {
+    unsigned UniformSize = UseUniform ? computeUniformInstCost() : 0;
+    assert(LoopSize >= (UP.BEInsns + UniformSize) &&
            "LoopSize should not be less than BEInsns!");
-    return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
+    return (uint64_t)(LoopSize - UP.BEInsns - UniformSize) * UP.Count +
+      UP.BEInsns + UniformSize;
+  }
+
+  unsigned computeUniformInstCost() {
+    if (!UniformSize) {
+      InstructionCost Size = 0;
+      for (BasicBlock *BB : TheLoop.blocks())
+        for (Instruction &I : *BB)
+          if (SE.isSCEVable(I.getType()) &&
+              SE.isLoopInvariant(SE.getSCEV(&I), &TheLoop))
+            Size += TTI.getUserCost(&I, TargetTransformInfo::TCK_CodeSize);
+      UniformSize = *Size.getValue();
+    }
+    return *UniformSize;
   }
 };
 
@@ -769,7 +807,7 @@
     TargetTransformInfo::UnrollingPreferences &UP,
     TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound) {
 
-  UnrollCostEstimator UCE(*L, LoopSize);
+  UnrollCostEstimator UCE(*L, TTI, SE, LoopSize);
 
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
@@ -778,7 +816,7 @@
     UP.Count = UnrollCount;
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
-    if (UP.AllowRemainder && UCE.getUnrolledLoopSize(UP) < UP.Threshold)
+    if (UP.AllowRemainder && UCE.isUnrolledLoopUnderThreshold(UP, UP.Threshold))
       return true;
   }
 
@@ -790,13 +828,13 @@
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
     if ((UP.AllowRemainder || (TripMultiple % PragmaCount == 0)) &&
-        UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+        UCE.isUnrolledLoopUnderThreshold(UP, PragmaUnrollThreshold))
       return true;
   }
   bool PragmaFullUnroll = hasUnrollFullPragma(L);
   if (PragmaFullUnroll && TripCount != 0) {
     UP.Count = TripCount;
-    if (UCE.getUnrolledLoopSize(UP) < PragmaUnrollThreshold)
+    if (UCE.isUnrolledLoopUnderThreshold(UP, PragmaUnrollThreshold))
       return false;
   }
 
@@ -845,7 +883,7 @@
   if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
     // When computing the unrolled size, note that BEInsns are not replicated
     // like the rest of the loop body.
-    if (UCE.getUnrolledLoopSize(UP) < UP.Threshold) {
+    if (UCE.isUnrolledLoopUnderThreshold(UP, UP.Threshold)) {
       UseUpperBound = (FullUnrollMaxTripCount == FullUnrollTripCount);
       TripCount = FullUnrollTripCount;
       TripMultiple = UP.UpperBound ? 1 : TripMultiple;
@@ -892,7 +930,7 @@
       UP.Count = TripCount;
     if (UP.PartialThreshold != NoThreshold) {
       // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      if (UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+      if (!UCE.isUnrolledLoopUnderThreshold(UP, UP.PartialThreshold))
         UP.Count =
             (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
             (LoopSize - UP.BEInsns);
@@ -907,7 +945,7 @@
         // remainder loop is allowed.
         UP.Count = UP.DefaultUnrollRuntimeCount;
         while (UP.Count != 0 &&
-               UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+               !UCE.isUnrolledLoopUnderThreshold(UP, UP.PartialThreshold))
           UP.Count >>= 1;
       }
       if (UP.Count < 2) {
@@ -991,7 +1029,7 @@
   // Reduce unroll count to be the largest power-of-two factor of
   // the original count which satisfies the threshold limit.
   while (UP.Count != 0 &&
-         UCE.getUnrolledLoopSize(UP) > UP.PartialThreshold)
+         !UCE.isUnrolledLoopUnderThreshold(UP, UP.PartialThreshold))
     UP.Count >>= 1;
 
 #ifndef NDEBUG
Index: llvm/test/Transforms/LoopUnroll/full-unroll-invariant-2.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopUnroll/full-unroll-invariant-2.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=104 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' -unroll-threshold=104 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Check that the course (not symbolic execution based) unroll cost model
+; discounts invariant instructions.  This test file is split from
+; full-unroll-invariant.ll because we need a different threshold value due
+; to an unrelated cost model problem.  The loop below should have a cost of
+; 1, but the handling of BEInsts results in it having a cost of 103.
+
+define i32 @test4(i8 %a) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[ZEXT_99:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[ZEXT_99]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %zext = zext i8 %a to i32
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 100
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret i32 %zext
+}