Index: include/llvm/Analysis/LoopUnrollAnalyzer.h
===================================================================
--- include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -48,8 +48,9 @@
 public:
   UnrolledInstAnalyzer(unsigned Iteration,
                        DenseMap<Value *, Constant *> &SimplifiedValues,
-                       ScalarEvolution &SE, const Loop *L)
-      : SimplifiedValues(SimplifiedValues), SE(SE), L(L) {
+                       ScalarEvolution &SE, const Loop *L, bool CompleteUnroll)
+      : SimplifiedValues(SimplifiedValues), SE(SE),
+        L(L), CompleteUnroll(CompleteUnroll) {
       IterationNumber = SE.getConstant(APInt(64, Iteration));
   }
 
@@ -81,6 +82,7 @@
 
   ScalarEvolution &SE;
   const Loop *L;
+  bool CompleteUnroll;
 
   bool simplifyInstWithSCEV(Instruction *I);
 
Index: lib/Analysis/LoopUnrollAnalyzer.cpp
===================================================================
--- lib/Analysis/LoopUnrollAnalyzer.cpp
+++ lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -35,6 +35,8 @@
     SimplifiedValues[I] = SC->getValue();
     return true;
   }
+  if (!CompleteUnroll)
+    return false;
 
   auto *AR = dyn_cast<SCEVAddRecExpr>(S);
   if (!AR || AR->getLoop() != L)
@@ -211,6 +213,10 @@
   if (Base::visitPHINode(PN))
     return true;
 
+  // Consider PHI is foldable only after complete unroll.
+  if (!CompleteUnroll)
+    return false;
+
   // The loop induction PHI nodes are definitionally free.
   return PN.getParent() == L->getHeader();
 }
Index: lib/Transforms/Scalar/LoopUnrollPass.cpp
===================================================================
--- lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -250,7 +250,7 @@
 static Optional<EstimatedUnrollCost>
 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                       ScalarEvolution &SE, const TargetTransformInfo &TTI,
-                      int MaxUnrolledLoopSize) {
+                      int MaxUnrolledLoopSize, bool CompleteUnroll = true) {
   // We want to be able to scale offsets by the trip count and add more offsets
   // to them without checking for overflows, and we already don't want to
   // analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -325,8 +325,9 @@
         // If this is a PHI node in the loop header, just add it to the PHI set.
         if (auto *PhiI = dyn_cast<PHINode>(I))
           if (PhiI->getParent() == L->getHeader()) {
-            assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
-                                  "inherently simplify during unrolling.");
+            if (CompleteUnroll)
+              assert(Cost.IsFree && "Loop PHIs shouldn't be evaluated as they "
+                                    "inherently simplify during unrolling.");
             if (Iteration == 0)
               continue;
 
@@ -402,21 +403,49 @@
           PHI->getNumIncomingValues() == 2 &&
           "Must have an incoming value only for the preheader and the latch.");
 
-      Value *V = PHI->getIncomingValueForBlock(
-          Iteration == 0 ? L->getLoopPreheader() : L->getLoopLatch());
-      Constant *C = dyn_cast<Constant>(V);
-      if (Iteration != 0 && !C)
-        C = SimplifiedValues.lookup(V);
-      if (C)
-        SimplifiedInputValues.push_back({PHI, C});
+      Value *VP = PHI->getIncomingValueForBlock(L->getLoopPreheader());
+      Value *VL = PHI->getIncomingValueForBlock(L->getLoopLatch());
+      if (CompleteUnroll) {
+        Value *V = Iteration == 0 ? VP : VL;
+        Constant *C = dyn_cast<Constant>(V);
+        if (Iteration != 0 && !C)
+          C = SimplifiedValues.lookup(V);
+        if (C)
+          SimplifiedInputValues.push_back({PHI, C});
+      }
+      // If incoming value for PHI is another PHI, that means we need to
+      // store previous value.
+      Instruction *IL = dyn_cast<Instruction>(VL);
+      if (Iteration != 0 && isa<PHINode>(VL) && IL && L->contains(IL))
+        RolledDynamicCost++;
+      // Simplify recurent XOR:
+      // I = PHI [VP, VL]
+      // VL = I ^ X
+      //
+      // To:
+      // I1 = VP
+      // VL1 = VP ^ X
+      // I2 = VP ^ X
+      // VL2 = VP
+      if ((TripCount & 1) == 0) {
+        if (IL && IL->getOpcode() == Instruction::Xor &&
+            IL->getOperand(0) == PHI && L->isLoopInvariant(IL->getOperand(1))) {
+          Constant *CP = dyn_cast<Constant>(VP);
+          if (Iteration & 1)
+            CP = SimplifiedValues.lookup(VL);
+          if (CP)
+            SimplifiedInputValues.push_back({PHI, CP});
+        }
+      }
     }
 
     // Now clear and re-populate the map for the next iteration.
     SimplifiedValues.clear();
     while (!SimplifiedInputValues.empty())
       SimplifiedValues.insert(SimplifiedInputValues.pop_back_val());
+    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues,
+                                  SE, L, CompleteUnroll);
 
-    UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, SE, L);
 
     BBWorklist.clear();
     BBWorklist.insert(L->getHeader());
@@ -515,7 +544,7 @@
 
     // If we found no optimization opportunities on the first iteration, we
     // won't find them on later ones too.
-    if (UnrolledCost == RolledDynamicCost) {
+    if (CompleteUnroll && UnrolledCost == RolledDynamicCost) {
       DEBUG(dbgs() << "  No opportunities found.. exiting.\n"
                    << "  UnrolledCost: " << UnrolledCost << "\n");
       return None;
@@ -532,6 +561,9 @@
         break;
 
       Value *Op = PN->getIncomingValueForBlock(ExitingBB);
+      if (auto *OpPN = dyn_cast<PHINode>(Op))
+        if (OpPN->getParent() == L->getHeader())
+          Op = OpPN->getIncomingValueForBlock(L->getLoopLatch());
       if (auto *OpI = dyn_cast<Instruction>(Op))
         if (L->contains(OpI))
           AddCostRecursively(*OpI, TripCount - 1);
@@ -929,6 +961,24 @@
           << NV("UnrollCount", UP.Count) << " time(s).");
   }
 
+  // Estimate if Force unroll could be profitable.
+  // Currently try only 2 times unroll.
+  // Do it only for innermost loops with unknown trip count.
+  // For know trip count Runtime unroll is more profitable.
+  if (!ExplicitUnroll &&
+      isa<SCEVCouldNotCompute>(SE->getBackedgeTakenCount(L))) {
+    Optional<EstimatedUnrollCost> Cost =
+        analyzeLoopUnrollCost(L, 2, DT, *SE, TTI,
+                              UP.PartialThreshold, false);
+    if (Cost && Cost->UnrolledCost < Cost->RolledDynamicCost) {
+      UP.Force = true;
+      UP.Count = 2;
+      // Unroll uncountable loops only once.
+      DEBUG(dbgs() << "  unrolling with count: " << UP.Count << "\n");
+      return true;
+    }
+  }
+
   if (UP.Count > UP.MaxCount)
     UP.Count = UP.MaxCount;
   DEBUG(dbgs() << "  partially unrolling with count: " << UP.Count << "\n");
Index: test/Transforms/LoopUnroll/unroll-force.ll
===================================================================
--- test/Transforms/LoopUnroll/unroll-force.ll
+++ test/Transforms/LoopUnroll/unroll-force.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -S -unroll-runtime -loop-unroll | FileCheck %s
+; The test checks unroll for uncountable loop, when previous value is stored.
+
+; CHECK: loop.body.if:
+; CHECK: loop.body.if.1:
+
+%struct.list = type { %struct.list*, i32 }
+
+@head = global %struct.list { %struct.list* null, i32 1 }, align 8
+
+; Function Attrs: norecurse nounwind uwtable
+define i32 @main(i32 %argc, i8** nocapture readnone %argv) {
+entry:
+  %0 = load i32, i32* getelementptr inbounds (%struct.list, %struct.list* @head, i64 0, i32 1), align 8
+  br label %loop.body
+
+loop.body:                                          ; preds = %entry, %loop.body.if
+  %prev = phi %struct.list* [ @head, %entry ], [ %cur, %loop.body.if ]
+  %cur = phi %struct.list* [ @head, %entry ], [ %2, %loop.body.if ]
+  %1 = phi i32 [ %0, %entry ], [ %3, %loop.body.if ]
+  %next.cur = getelementptr inbounds %struct.list, %struct.list* %cur, i64 0, i32 0
+  %2 = load %struct.list*, %struct.list** %next.cur, align 8
+  %tobool = icmp eq %struct.list* %2, null
+  br i1 %tobool, label %if.else, label %loop.body.if
+
+loop.body.if:                       ; preds = %loop.body
+  %val = getelementptr inbounds %struct.list, %struct.list* %cur, i64 0, i32 1
+  %3 = load i32, i32* %val, align 8
+  %cmp = icmp sgt i32 %3, %0
+  br i1 %cmp, label %loopexit, label %loop.body
+
+if.else:                                          ; preds = %if.then
+  %next.prev = getelementptr inbounds %struct.list, %struct.list* %prev, i64 0, i32 0
+  store %struct.list* null, %struct.list** %next.prev, align 8
+  br label %end
+
+loopexit:                                 ; preds = %loop.body.if
+  br label %end
+
+end:                                          ; preds = %loopexit, %if.else
+  %4 = phi i32 [ %1, %if.else ], [ %3, %loopexit ]
+  ret i32 %4
+}
+
+; The test checks unroll for uncountable loop, when XOR is simplified.
+
+; CHECK: while.body:
+; CHECK: while.body.1:
+
+; Function Attrs: norecurse nounwind uwtable
+define void @xor(i32* nocapture %a) local_unnamed_addr #0 {
+entry:
+  %0 = load i32, i32* %a, align 4
+  %entry.cmp = icmp eq i32 %0, -1
+  br i1 %entry.cmp, label %while.end, label %while.body
+
+while.body:                                       ; preds = %entry, %while.body
+  %1 = phi i32 [ %2, %while.body ], [ %0, %entry ]
+  %arrayidx12 = phi i32* [ %arrayidx, %while.body ], [ %a, %entry ]
+  %s = phi i32 [ %xor, %while.body ], [ 0, %entry ]
+  %i = phi i32 [ %inc, %while.body ], [ undef, %entry ]
+  %add = add nsw i32 %1, %s
+  store i32 %add, i32* %arrayidx12, align 4
+  %xor = xor i32 %s, 1
+  %inc = add nsw i32 %i, 1
+  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %inc
+  %2 = load i32, i32* %arrayidx, align 4
+  %cmp = icmp eq i32 %2, -1
+  br i1 %cmp, label %while.end, label %while.body
+
+while.end:                                        ; preds = %while.body, %entry
+  ret void
+}
+
Index: unittests/Analysis/UnrollAnalyzer.cpp
===================================================================
--- unittests/Analysis/UnrollAnalyzer.cpp
+++ unittests/Analysis/UnrollAnalyzer.cpp
@@ -38,7 +38,7 @@
     TripCount = SE->getSmallConstantTripCount(L, Exiting);
     for (unsigned Iteration = 0; Iteration < TripCount; Iteration++) {
       DenseMap<Value *, Constant *> SimplifiedValues;
-      UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE, L);
+      UnrolledInstAnalyzer Analyzer(Iteration, SimplifiedValues, *SE, L, true);
       for (auto *BB : L->getBlocks())
         for (Instruction &I : *BB)
           Analyzer.visit(I);