diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopUnrollAnalyzer.h"
 #include "llvm/Analysis/MemorySSA.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -251,7 +252,35 @@
                  L->dump());
       return false;
     }
-    if (Metrics.NumInsts > MaxHeaderSize)
+
+    // We track the simplification of each instruction in each iteration. We use
+    // this to recursively merge costs into the unrolled cost on-demand so that
+    // we don't count the cost of any dead code. This is essentially a map from
+    // <instruction, int> to <bool, bool>, but stored as a densely packed
+    // struct.
+    DenseSet<UnrolledInstState, UnrolledInstStateKeyInfo> InstCostMap;
+    //
+    // The estimated cost of the unrolled form of the loop. We try to estimate
+    // this by simplifying as much as we can while computing the estimate.
+    unsigned UnrolledCost = 0;
+
+    // We also track the estimated dynamic (that is, actually executed) cost in
+    // the rolled form. This helps identify cases when the savings from
+    // unrolling aren't just exposing dead control flows, but actual reduced
+    // dynamic instructions due to the simplifications which we expect to occur
+    // after unrolling.
+    unsigned RolledDynamicCost = 0;
+
+    DenseMap<Value *, Constant *> SimplifiedValues;
+
+    UnrolledInstAnalyzer Analyzer(0, SimplifiedValues, EphValues, InstCostMap,
+                                  RolledDynamicCost, UnrolledCost,
+                                  MaxHeaderSize, *SE, *TTI, L);
+    if (!Analyzer.visitBB(*OrigHeader))
+      return false;
+
+    Analyzer.AddCostRecursively(*OrigHeader->getTerminator(), 0);
+    if (UnrolledCost > MaxHeaderSize + 1)
       return false;
   }
 
diff --git a/llvm/test/Transforms/LoopRotate/oz-disable.ll b/llvm/test/Transforms/LoopRotate/oz-disable.ll
--- a/llvm/test/Transforms/LoopRotate/oz-disable.ll
+++ b/llvm/test/Transforms/LoopRotate/oz-disable.ll
@@ -2,14 +2,18 @@
 ; RUN: opt < %s -S -Os -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OS
 ; RUN: opt < %s -S -Oz -debug -debug-only=loop-rotate 2>&1 | FileCheck %s -check-prefix=OZ
 
-; Loop should be rotated for -Os but not for -Oz.
-; OS: rotating Loop at depth 1
-; OZ-NOT: rotating Loop at depth 1
-
 @e = global i32 10
 
 declare void @use(i32)
 
+; Loop should be rotated for -Os but not for -Oz, as not all instructions in
+; the header are free.
+
+; OS: LoopRotation: rotating Loop at depth 1 containing: %loop<header><exiting>,%loop.fin<latch>
+; OS-NEXT: LoopRotation: into Loop at depth 1 containing: %loop.fin<header><latch><exiting>
+
+; OZ-NOT: LoopRotation: rotating Loop at depth 1 containing: %loop<header><exiting>,%loop.fin<latch>
+; OZ-NOT: LoopRotation: into Loop at depth 1 containing: %loop.fin<header><latch><exiting>
 define void @test() {
 entry:
   %end = load i32, i32* @e
@@ -28,3 +32,29 @@
 exit:
   ret void
 }
+
+; Loop should be rotated for both -Os and -Oz, as all instructions in the header
+; are free.
+
+; OS-NEXT: LoopRotation: rotating Loop at depth 1 containing: %loop2<header><exiting>,%loop2.fin<latch>
+; OS-NEXT: LoopRotation: into Loop at depth 1 containing: %loop2.fin<header><latch><exiting>
+
+; OZ: LoopRotation: rotating Loop at d
+; OZ-NEXT: LoopRotation: into Loop at depth 1 containing: %loop2.fin<header><latch><exiting>
+define void @test2() {
+entry:
+  br label %loop2
+
+loop2:
+  %n.phi = phi i32 [ %n, %loop2.fin ], [ 0, %entry ]
+  %cond = icmp eq i32 %n.phi, 15
+  br i1 %cond, label %exit, label %loop2.fin
+
+loop2.fin:
+  %n = add i32 %n.phi, 1
+  call void @use(i32 %n)
+  br label %loop2
+
+exit:
+  ret void
+}