Index: include/llvm/Transforms/Utils/LoopUtils.h
===================================================================
--- include/llvm/Transforms/Utils/LoopUtils.h
+++ include/llvm/Transforms/Utils/LoopUtils.h
@@ -461,6 +461,11 @@
 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
                              unsigned V = 0);
 
+/// \brief Get a loop's estimated trip count based on branch weight metadata.
+/// Returns 0 when the count is estimated to be 0, or None when a meaningful
+/// estimate can not be made.
+Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
+
 /// Helper to consistently add the set of standard passes to a loop pass's \c
 /// AnalysisUsage.
 ///
Index: lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -102,6 +102,12 @@
     cl::desc("Unrolled size limit for loops with an unroll(full) or "
              "unroll_count pragma."));
 
+static cl::opt<unsigned> FlatLoopTripCountThreshold(
+    "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
+    cl::desc("If the runtime tripcount for the loop is lower than the "
+             "threshold, the loop is considered as flat and will be less "
+             "aggressively unrolled."));
+
 /// A magic value for use with the Threshold parameter to indicate
 /// that the loop unroll should be performed regardless of how much
 /// code expansion would result.
@@ -748,6 +754,16 @@
   bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
                         PragmaEnableUnroll || UserUnrollCount;
 
+  // Check if the runtime trip count is too small when profile is available.
+  if (L->getHeader()->getParent()->getEntryCount() && TripCount == 0) {
+    if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
+      if (*ProfileTripCount < FlatLoopTripCountThreshold)
+        return false;
+      else
+        UP.AllowExpensiveTripCount = true;
+    }
+  }
+
   if (ExplicitUnroll && TripCount != 0) {
     // If the loop has an unrolling pragma, we want to be more aggressive with
     // unrolling limits. Set thresholds to at least the PragmaThreshold value
Index: lib/Transforms/Utils/LoopUtils.cpp
===================================================================
--- lib/Transforms/Utils/LoopUtils.cpp
+++ lib/Transforms/Utils/LoopUtils.cpp
@@ -1067,3 +1067,39 @@
   // just a special case of this.)
   return true;
 }
+
+Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
+  // Only support loops with a unique exiting block, and a latch.
+  if (!L->getExitingBlock())
+    return None;
+
+  // Get the branch weights for the the loop's backedge.
+  BranchInst *LatchBR =
+      dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
+  if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+    return None;
+
+  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+          LatchBR->getSuccessor(1) == L->getHeader()) &&
+         "At least one edge out of the latch must go to the header");
+
+  // To estimate the number of times the loop body was executed, we want to
+  // know the number of times the backedge was taken, vs. the number of times
+  // we exited the loop.
+  // The branch weights give us almost what we want, since they were adjusted
+  // from the raw counts to provide a better probability estimate. Remove
+  // the adjustment by subtracting 1 from both weights.
+  uint64_t TrueVal, FalseVal;
+  if (!LatchBR->extractProfMetadata(TrueVal, FalseVal) || (TrueVal <= 1) ||
+      (FalseVal <= 1))
+    return None;
+
+  TrueVal -= 1;
+  FalseVal -= 1;
+
+  // Divide the count of the backedge by the count of the edge exiting the loop.
+  if (LatchBR->getSuccessor(0) == L->getHeader())
+    return TrueVal / FalseVal;
+  else
+    return FalseVal / TrueVal;
+}
Index: test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
===================================================================
--- /dev/null
+++ test/Transforms/LoopUnroll/unroll-heuristics-pgo.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-threshold=40 -unroll-dynamic-cost-savings-discount=0 | FileCheck %s
+
+@known_constant = internal unnamed_addr constant [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
+
+; CHECK-LABEL: @bar_prof
+; CHECK: loop.prol:
+; CHECK: loop:
+; CHECK: %mul = mul
+; CHECK: %mul.1 = mul
+; CHECK: %mul.2 = mul
+; CHECK: %mul.3 = mul
+define i32 @bar_prof(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, %c
+  br i1 %exitcond86.i, label %loop.end, label %loop, !prof !2
+
+loop.end:
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+; CHECK-LABEL: @bar_prof_flat
+; CHECK-NOT: loop.prol
+define i32 @bar_prof_flat(i32* noalias nocapture readonly %src, i64 %c) !prof !1 {
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
+  %r  = phi i32 [ 0, %entry ], [ %add, %loop ]
+  %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+  %src_element = load i32, i32* %arrayidx, align 4
+  %array_const_idx = getelementptr inbounds [9 x i32], [9 x i32]* @known_constant, i64 0, i64 %iv
+  %const_array_element = load i32, i32* %array_const_idx, align 4
+  %mul = mul nsw i32 %src_element, %const_array_element
+  %add = add nsw i32 %mul, %r
+  %inc = add nuw nsw i64 %iv, 1
+  %exitcond86.i = icmp eq i64 %inc, %c
+  br i1 %exitcond86.i, label %loop, label %loop.end, !prof !2
+
+loop.end:
+  %r.lcssa = phi i32 [ %r, %loop ]
+  ret i32 %r.lcssa
+}
+
+!1 = !{!"function_entry_count", i64 1}
+!2 = !{!"branch_weights", i32 1, i32 1000}