Index: lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -687,6 +687,64 @@
   return false;
 }
 
+// Returns number of instructions that could be optimized out if a loop
+// get unrolled. Initially based only on the following structure.
+//
+// loop:
+//   R.prev = phi[R, ...
+//   R = phi [R.next, ...
+//   2 Uses of R or Rnext or R.prev
+//   R.next calculation depends on R
+// loopexit:
+//   1 Use of R or Rnext or R.prev
+//
+// Without unroll we will need a move to store R.prev value.
+// With unroll on 2 or more R.prev and R just go to different
+// registers.
+static unsigned ForceUnrollProfitability(Loop *L) {
+  BasicBlock *Latch = L->getLoopLatch();
+  BasicBlock *Header = L->getHeader();
+  // By default no instructions are optimized out.
+  unsigned OptInsns = 0;
+  if (!Latch || !Header)
+    return OptInsns;
+  for (Instruction &BBI : *Header) {
+    PHINode *PN = dyn_cast<PHINode>(&BBI);
+    // Exit when we've passed all PHI nodes.
+    if (!PN)
+      break;
+    // To simplify calculations we do not check that PN represents a
+    // recurrency (which could be a very long chain) potentially hitting
+    // some extra cases.
+    PHINode *PrevPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(Latch));
+    if (!PrevPN)
+      continue;
+    SmallVector<PHINode *, 2> PNs;
+    PNs.push_back(PN);
+    if (PN != PrevPN)
+      PNs.push_back(PrevPN);
+    unsigned PNLoopUses = 0, PNUses = 0;
+    for (PHINode *Phi : PNs) {
+      if (PNLoopUses >= 2 && PNUses >= 1)
+        break;
+      for (User *U : Phi->users()) {
+        Instruction *I = dyn_cast<Instruction>(U);
+        if (!I)
+          continue;
+        if (L->contains(I))
+          PNLoopUses++;
+        else
+          PNUses++;
+        if (PNLoopUses >= 2 && PNUses >= 1) {
+          OptInsns++;
+          break;
+        }
+      }
+    }
+  }
+  return OptInsns;
+}
+
 // Returns true if unroll count was set explicitly.
 // Calculates unroll count and writes it to UP.Count.
 static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
@@ -886,6 +944,14 @@
               " time(s).");
   }
 
+  // Check if forced unroll is profitable.
+  if (!ExplicitUnroll) {
+    unsigned NewBEInsns = ForceUnrollProfitability(L);
+    if (NewBEInsns &&
+        (LoopSize - NewBEInsns) * UP.Count + NewBEInsns < UP.Threshold)
+      UP.Force = true;
+  }
+
   if (UP.Count > UP.MaxCount)
     UP.Count = UP.MaxCount;
   DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count << "\n");
Index: test/Transforms/LoopUnroll/unroll-force.ll
===================================================================
--- test/Transforms/LoopUnroll/unroll-force.ll
+++ test/Transforms/LoopUnroll/unroll-force.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -S -unroll-runtime -loop-unroll -unroll-max-count=2 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; The test checks unroll for uncountable loop.
+
+; CHECK: if.then:
+; CHECK: if.then.1:
+
+%struct.list = type { %struct.list*, i32 }
+
+@head = global %struct.list { %struct.list* null, i32 1 }, align 8
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
+entry:
+  %0 = load i32, i32* getelementptr inbounds (%struct.list, %struct.list* @head, i64 0, i32 1), align 8
+  br label %if.then
+
+if.then:                                          ; preds = %entry, %if.then.for.cond_crit_edge
+  %prev.024 = phi %struct.list* [ @head, %entry ], [ %cur.023, %if.then.for.cond_crit_edge ]
+  %cur.023 = phi %struct.list* [ @head, %entry ], [ %2, %if.then.for.cond_crit_edge ]
+  %1 = phi i32 [ %0, %entry ], [ %.pre, %if.then.for.cond_crit_edge ]
+  %next = getelementptr inbounds %struct.list, %struct.list* %cur.023, i64 0, i32 0
+  %2 = load %struct.list*, %struct.list** %next, align 8
+  %tobool = icmp eq %struct.list* %2, null
+  br i1 %tobool, label %if.else, label %if.then.for.cond_crit_edge
+
+if.then.for.cond_crit_edge:                       ; preds = %if.then
+  %val.phi.trans.insert = getelementptr inbounds %struct.list, %struct.list* %cur.023, i64 0, i32 1
+  %.pre = load i32, i32* %val.phi.trans.insert, align 8
+  %cmp = icmp sgt i32 %.pre, %0
+  br i1 %cmp, label %for.end.loopexit, label %if.then
+
+if.else:                                          ; preds = %if.then
+  %next3 = getelementptr inbounds %struct.list, %struct.list* %prev.024, i64 0, i32 0
+  store %struct.list* null, %struct.list** %next3, align 8
+  br label %for.end
+
+for.end.loopexit:                                 ; preds = %if.then.for.cond_crit_edge
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %if.else
+  %3 = phi i32 [ %1, %if.else ], [ %.pre, %for.end.loopexit ]
+  ret i32 %3
+}