Index: lib/Analysis/ScalarEvolutionExpander.cpp
===================================================================
--- lib/Analysis/ScalarEvolutionExpander.cpp
+++ lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1822,6 +1822,47 @@
   return NumElim;
 }
 
+// If S contains Base, return true. This function is called by
+// findExistingExpansion.
+static bool IsContainedIn(const SCEV *S, const SCEV *Base) {
+  struct SCEVSearch {
+    const SCEV *Node;
+    bool IsFound;
+
+    SCEVSearch(const SCEV *N) : Node(N), IsFound(false) {}
+
+    bool follow(const SCEV *S) {
+      IsFound |= (S == Node);
+      return !IsFound;
+    }
+    bool isDone() const { return IsFound; }
+  };
+
+  SCEVSearch Search(Base);
+  visitAll(S, Search);
+  return Search.IsFound;
+}
+
+// Recursive search of the IR level operands of LHS and RHS to see if any of
+// them are congruent to S. This function is called by findExistingExpansion.
+static Instruction *FindSameSCEVInst(ScalarEvolution &SE, const SCEV *S,
+                                     Instruction *I, unsigned depth) {
+  // Limit our recursion depth.
+  if (I == nullptr || depth > 3)
+    return nullptr;
+
+  if (SE.getSCEV(I) == S)
+    return I;
+
+  for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+    Instruction *Op = FindSameSCEVInst(
+        SE, S, dyn_cast<Instruction>(I->getOperand(i)), ++depth);
+    return Op;
+  }
+
+  return nullptr;
+}
+
 Value *SCEVExpander::findExistingExpansion(const SCEV *S,
                                            const Instruction *At, Loop *L) {
   using namespace llvm::PatternMatch;
@@ -1833,18 +1874,28 @@
   for (BasicBlock *BB : ExitingBlocks) {
     ICmpInst::Predicate Pred;
     Instruction *LHS, *RHS;
+    Value *RHSV;
     BasicBlock *TrueBB, *FalseBB;
 
     if (!match(BB->getTerminator(),
-               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)),
-                    TrueBB, FalseBB)))
+               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Value(RHSV)), TrueBB,
+                    FalseBB)))
       continue;
 
-    if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
-      return LHS;
-
-    if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
-      return RHS;
+    if (isa<Instruction>(RHSV)) {
+      RHS = cast<Instruction>(RHSV);
+      if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
+        return LHS;
+
+      if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
+        return RHS;
+    } else if (isa<ConstantInt>(RHSV)) {
+      if (IsContainedIn(SE.getSCEV(LHS), S)) {
+        LHS = FindSameSCEVInst(SE, S, LHS, 0);
+        if (LHS != nullptr && SE.DT.dominates(LHS, At))
+          return LHS;
+      }
+    }
   }
 
   // There is potential to make this significantly smarter, but this simple
Index: lib/Transforms/Utils/LoopUnrollRuntime.cpp
===================================================================
--- lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -311,9 +311,19 @@
     return false;
 
   BasicBlock *Header = L->getHeader();
+  BasicBlock *PH = L->getLoopPreheader();
+  BasicBlock *Latch = L->getLoopLatch();
+  // It helps to splits the original preheader twice, one for the end of the
+  // prolog code and one for a new loop preheader
+  BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
+  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
+  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
+
   const DataLayout &DL = Header->getModule()->getDataLayout();
   SCEVExpander Expander(*SE, DL, "loop-unroll");
-  if (!AllowExpensiveTripCount && Expander.isHighCostExpansion(TripCountSC, L))
+
+  if (!AllowExpensiveTripCount &&
+      Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR))
     return false;
 
   // We only handle cases when the unroll factor is a power of 2.
@@ -331,14 +341,6 @@
   if (Loop *ParentLoop = L->getParentLoop())
     SE->forgetLoop(ParentLoop);
 
-  BasicBlock *PH = L->getLoopPreheader();
-  BasicBlock *Latch = L->getLoopLatch();
-  // It helps to splits the original preheader twice, one for the end of the
-  // prolog code and one for a new loop preheader
-  BasicBlock *PEnd = SplitEdge(PH, Header, DT, LI);
-  BasicBlock *NewPH = SplitBlock(PEnd, PEnd->getTerminator(), DT, LI);
-  BranchInst *PreHeaderBR = cast<BranchInst>(PH->getTerminator());
-
   // Compute the number of extra iterations required, which is:
   //  extra iterations = run-time trip count % (loop unroll factor + 1)
   Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(),
Index: test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
===================================================================
--- test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
+++ test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
@@ -24,4 +24,30 @@
   ret i32 0
 }
 
+;; We expect this loop to be unrolled, because IV's step is minus one.
+;; In this case, we don't need to generate dvision for computing trip count.
+
+define i32 @test2(i64* %loc, i64 %conv7) {
+; CHECK-LABEL: @test2(
+; CHECK-LABEL: for.body.prol
+entry:
+  %rem0 = load i64, i64* %loc, align 8, !tbaa !0
+  %div11 = udiv i64 %rem0, %conv7
+  %cmp.i38 = icmp ugt i64 %div11, 1
+  %div12 = select i1 %cmp.i38, i64 %div11, i64 1
+  br label %for.body
+for.body:
+  %rem1 = phi i64 [ %rem0, %entry ], [ %rem2, %for.body ]
+  %k1 = phi i64 [ %div12, %entry ], [ %dec, %for.body ]
+  %mul1 = mul i64 %rem1, 48271
+  %rem2 = urem i64 %mul1, 2147483647
+  %dec = add i64 %k1, -1
+  %cmp = icmp eq i64 %dec, 0
+  br i1 %cmp, label %exit, label %for.body
+exit:
+  %rem3 = phi i64 [ %rem2, %for.body ]
+  store i64 %rem3, i64* %loc, align 8, !tbaa !0
+  ret i32 0
+}
+
 !0 = !{i64 1, i64 100}