diff --git a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
--- a/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
+++ b/llvm/include/llvm/Analysis/LoopUnrollAnalyzer.h
@@ -83,6 +83,10 @@
 
   bool simplifyInstWithSCEV(Instruction *I);
 
+  /// Try to simplify an address computation outside of the loop body, so we can
+  /// compare it with simplified addresses in the loop.
+  void simplifyNonLoopAddress(Value *V);
+
   bool visitInstruction(Instruction &I) { return simplifyInstWithSCEV(&I); }
   bool visitBinaryOperator(BinaryOperator &I);
   bool visitLoad(LoadInst &I);
diff --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
--- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -60,6 +60,32 @@
   return false;
 }
 
+void UnrolledInstAnalyzer::simplifyNonLoopAddress(Value *V) {
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  // For now we only try to simplify address computation instructions outside
+  // the current loop.
+  if (!Inst || !Inst->getType()->isPointerTy() ||
+      !SE.isSCEVable(Inst->getType()) || L->contains(Inst))
+    return;
+  auto Iter = SimplifiedAddresses.find(Inst);
+  if (Iter != SimplifiedAddresses.end())
+    return;
+
+  const SCEV *S = SE.getSCEV(Inst);
+  // Check if the offset from the base address becomes a constant.
+  auto *Base = dyn_cast<SCEVUnknown>(SE.getPointerBase(S));
+  if (!Base)
+    return;
+  auto *Offset = dyn_cast<SCEVConstant>(SE.getMinusSCEV(S, Base));
+  if (!Offset)
+    return;
+
+  SimplifiedAddress Address;
+  Address.Base = Base->getValue();
+  Address.Offset = Offset->getValue();
+  SimplifiedAddresses[V] = Address;
+}
+
 /// Try to simplify binary operator I.
 ///
 /// TODO: Probably it's worth to hoist the code for estimating the
@@ -175,6 +201,8 @@
       RHS = SimpleRHS;
 
   if (!isa<Constant>(LHS) && !isa<Constant>(RHS)) {
+    simplifyNonLoopAddress(LHS);
+    simplifyNonLoopAddress(RHS);
     auto SimplifiedLHS = SimplifiedAddresses.find(LHS);
     if (SimplifiedLHS != SimplifiedAddresses.end()) {
       auto SimplifiedRHS = SimplifiedAddresses.find(RHS);
diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -423,9 +423,10 @@
 
         // First accumulate the cost of this instruction.
         if (!Cost.IsFree) {
-          UnrolledCost += TTI.getUserCost(I);
+          unsigned UserCost = TTI.getUserCost(I);
+          UnrolledCost += UserCost;
           LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
-                            << Iteration << "): ");
+                            << Iteration << "): " << UserCost << " ");
           LLVM_DEBUG(I->dump());
         }
 
diff --git a/llvm/test/Transforms/LoopUnroll/unrolled-inst-analyzer-pointers.ll b/llvm/test/Transforms/LoopUnroll/unrolled-inst-analyzer-pointers.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/unrolled-inst-analyzer-pointers.ll
@@ -0,0 +1,58 @@
+; RUN: opt -loop-unroll -debug < %s 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+; Check that we simplify pointers outside of the loop for comparisons.
+; %cmp1.i.i is free when unrolling.
+
+define i32 @test_2_iters(i32 %a, i32 %b, i32 %c, i32 %d) optsize {
+; CHECK-LABEL: Loop Unroll: F[test_2_iters] Loop %loop
+; CHECK-NEXT:  Loop Size = 7
+; CHECK-NEXT:  Starting LoopUnroll profitability analysis...
+; CHECK-NEXT:   Analyzing iteration 0
+; CHECK-NEXT:   Analyzing iteration 1
+; CHECK-NEXT:  Adding cost of instruction (iteration 1): 1   %spec.select.i.i = select i1 %cmp.i.i.i, i32* %incdec.ptr.i.i8, i32* %spec.select.i.i7
+; CHECK-NEXT:  Adding cost of instruction (iteration 1): 1   %cmp.i.i.i = icmp slt i32 %.pre, %.pre3
+; CHECK-NEXT:  Adding cost of instruction (iteration 1): 1   %.pre3 = load i32, i32* %incdec.ptr.i.i8, align 4
+; CHECK-NEXT:  Adding cost of instruction (iteration 1): 1   %.pre = load i32, i32* %spec.select.i.i7, align 4
+; CHECK-NEXT:  Adding cost of instruction (iteration 0): 1   %incdec.ptr.i.i = getelementptr inbounds i32, i32* %incdec.ptr.i.i8, i64 1
+; CHECK-NEXT:  Adding cost of instruction (iteration 0): 1   %spec.select.i.i = select i1 %cmp.i.i.i, i32* %incdec.ptr.i.i8, i32* %spec.select.i.i7
+; CHECK-NEXT:  Adding cost of instruction (iteration 0): 1   %cmp.i.i.i = icmp slt i32 %.pre, %.pre3
+; CHECK-NEXT:  Adding cost of instruction (iteration 0): 1   %.pre3 = load i32, i32* %incdec.ptr.i.i8, align 4
+; CHECK-NEXT:  Adding cost of instruction (iteration 0): 1   %.pre = load i32, i32* %spec.select.i.i7, align 4
+; CHECK-NEXT:  Analysis finished:
+; CHECK-NEXT:    UnrolledCost: 9, RolledDynamicCost: 14
+
+
+entry:
+  %ref.tmp = alloca [4 x i32], align 4
+  %0 = bitcast [4 x i32]* %ref.tmp to i8*
+  %arrayinit.begin = getelementptr inbounds [4 x i32], [4 x i32]* %ref.tmp, i64 0, i64 0
+  store i32 %a, i32* %arrayinit.begin, align 4
+  %arrayinit.element = getelementptr inbounds [4 x i32], [4 x i32]* %ref.tmp, i64 0, i64 1
+  store i32 %b, i32* %arrayinit.element, align 4
+  %arrayinit.element1 = getelementptr inbounds [4 x i32], [4 x i32]* %ref.tmp, i64 0, i64 2
+  store i32 %c, i32* %arrayinit.element1, align 4
+  %arrayinit.element2 = getelementptr inbounds [4 x i32], [4 x i32]* %ref.tmp, i64 0, i64 3
+  store i32 %d, i32* %arrayinit.element2, align 4
+  %add.ptr.i.i = getelementptr inbounds [4 x i32], [4 x i32]* %ref.tmp, i64 0, i64 4
+  %cmp.i.i.i4 = icmp slt i32 %a, %b
+  %spec.select.i.i5 = select i1 %cmp.i.i.i4, i32* %arrayinit.element, i32* %arrayinit.begin
+  %incdec.ptr.i.i6 = getelementptr inbounds [4 x i32], [4 x i32]* %ref.tmp, i64 0, i64 2
+  br label %loop
+
+loop:                           ; preds = %entry, %loop
+  %incdec.ptr.i.i8 = phi i32* [ %incdec.ptr.i.i6, %entry ], [ %incdec.ptr.i.i, %loop ]
+  %spec.select.i.i7 = phi i32* [ %spec.select.i.i5, %entry ], [ %spec.select.i.i, %loop ]
+  %.pre = load i32, i32* %spec.select.i.i7, align 4
+  %.pre3 = load i32, i32* %incdec.ptr.i.i8, align 4
+  %cmp.i.i.i = icmp slt i32 %.pre, %.pre3
+  %spec.select.i.i = select i1 %cmp.i.i.i, i32* %incdec.ptr.i.i8, i32* %spec.select.i.i7
+  %incdec.ptr.i.i = getelementptr inbounds i32, i32* %incdec.ptr.i.i8, i64 1
+  %cmp1.i.i = icmp eq i32* %incdec.ptr.i.i, %add.ptr.i.i
+  br i1 %cmp1.i.i, label %exit, label %loop
+
+exit:                           ; preds = %loop
+  %1 = load i32, i32* %spec.select.i.i, align 4
+  ret i32 %1
+}