Index: include/llvm/Analysis/ScalarEvolution.h
===================================================================
--- include/llvm/Analysis/ScalarEvolution.h
+++ include/llvm/Analysis/ScalarEvolution.h
@@ -1538,6 +1538,10 @@
   /// the header of loop L.
   bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L);
 
+  /// Determine if the SCEV depends on a SCEVUnknown which is not invariant
+  /// w.r.t. the loop.
+  bool dependsOnVariantSCEVUnknown(const SCEV *S, const Loop *L);
+
   /// Return true if the given SCEV changes value in a known way in the
   /// specified loop.  This property being true implies that the value is
   /// variant in the loop AND that we can emit an expression to compute the
Index: lib/Analysis/ScalarEvolution.cpp
===================================================================
--- lib/Analysis/ScalarEvolution.cpp
+++ lib/Analysis/ScalarEvolution.cpp
@@ -2234,6 +2234,31 @@
   return !FSU.Found;
 }
 
+bool
+ScalarEvolution::dependsOnVariantSCEVUnknown(const SCEV *S, const Loop *L) {
+  struct FindSCEVUnknown {
+    const Loop *L;
+    ScalarEvolution *SE;
+    bool Found = false;
+
+    FindSCEVUnknown(const Loop *L, ScalarEvolution *SE)
+      : L(L), SE(SE) {}
+
+    bool follow(const SCEV *S) {
+      if (isa<SCEVUnknown>(S) && !SE->isLoopInvariant(S, L))
+        Found = true;
+      return true;
+    }
+
+    bool isDone() { return Found; }
+  };
+
+  FindSCEVUnknown FSU = FindSCEVUnknown(L, this);
+  SCEVTraversal<FindSCEVUnknown> ST(FSU);
+  ST.visitAll(S);
+  return FSU.Found;
+}
+
 /// Get a canonical add expression, or something simpler if possible.
 const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                                         SCEV::NoWrapFlags Flags,
Index: lib/Transforms/Scalar/LoopStrengthReduce.cpp
===================================================================
--- lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1001,14 +1001,16 @@
   void RateRegister(const SCEV *Reg,
                     SmallPtrSetImpl<const SCEV *> &Regs,
                     const Loop *L,
-                    ScalarEvolution &SE, DominatorTree &DT);
+                    ScalarEvolution &SE, DominatorTree &DT,
+                    const LSRUse &LU);
   void RatePrimaryRegister(const SCEV *Reg,
                            SmallPtrSetImpl<const SCEV *> &Regs,
                            const Loop *L,
                            ScalarEvolution &SE, DominatorTree &DT,
-                           SmallPtrSetImpl<const SCEV *> *LoserRegs);
+                           SmallPtrSetImpl<const SCEV *> *LoserRegs,
+                           const LSRUse &LU);
 };
-  
+
 /// An operand value in an instruction which is to be replaced with some
 /// equivalent, possibly strength-reduced, replacement.
 struct LSRFixup {
@@ -1097,6 +1099,10 @@
   /// the loop, in which case some special-case heuristics may be used.
   bool AllFixupsOutsideLoop;
 
+  /// This records whether all of the fixups using this LSRUse are inside
+  /// the loop, in which case some special-case heuristics may be used.
+  bool AllFixupsInsideLoop;
+
   /// RigidFormula is set to true to guarantee that this use will be associated
   /// with a single formula--the one that initially matched. Some SCEV
   /// expressions cannot be expanded. This allows LSR to consider the registers
@@ -1120,8 +1126,8 @@
 
   LSRUse(KindType K, MemAccessTy AT)
       : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
-        AllFixupsOutsideLoop(true), RigidFormula(false),
-        WidestFixupType(nullptr) {}
+        AllFixupsOutsideLoop(true), AllFixupsInsideLoop(true),
+        RigidFormula(false), WidestFixupType(nullptr) {}
 
   LSRFixup &getNewFixup() {
     Fixups.push_back(LSRFixup());
@@ -1135,7 +1141,7 @@
     if (f.Offset < MinOffset)
       MinOffset = f.Offset;
   }
-  
+
   bool HasFormulaWithSameRegs(const Formula &F) const;
   float getNotSelectedProbability(const SCEV *Reg) const;
   bool InsertFormula(const Formula &F, const Loop &L);
@@ -1152,7 +1158,18 @@
 void Cost::RateRegister(const SCEV *Reg,
                         SmallPtrSetImpl<const SCEV *> &Regs,
                         const Loop *L,
-                        ScalarEvolution &SE, DominatorTree &DT) {
+                        ScalarEvolution &SE, DominatorTree &DT,
+                        const LSRUse &LU) {
+  // If we have a use of loop-variant SCEVUnknown outside the loop, it is likely
+  // that the optimization won't give us any benefits here: we will have to
+  // reuse these values from within the loop and cannot adequately evaluate how
+  // many other instructions from the loop they use. So in fact it is possible
+  // that we will not be able to optimize anything from the loop, but will
+  // create LSR Phis and duplicate some instructions, producing a worse code.
+  if (!LU.AllFixupsInsideLoop && SE.dependsOnVariantSCEVUnknown(Reg, L)) {
+    Lose();
+    return;
+  }
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
     // If this is an addrec for another loop, it should be an invariant
     // with respect to L since L is the innermost loop (at least
@@ -1179,7 +1196,7 @@
     // TODO: The non-affine case isn't precisely modeled here.
     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
       if (!Regs.count(AR->getOperand(1))) {
-        RateRegister(AR->getOperand(1), Regs, L, SE, DT);
+        RateRegister(AR->getOperand(1), Regs, L, SE, DT, LU);
         if (isLoser())
           return;
       }
@@ -1207,13 +1224,14 @@
                                SmallPtrSetImpl<const SCEV *> &Regs,
                                const Loop *L,
                                ScalarEvolution &SE, DominatorTree &DT,
-                               SmallPtrSetImpl<const SCEV *> *LoserRegs) {
+                               SmallPtrSetImpl<const SCEV *> *LoserRegs,
+                               const LSRUse &LU) {
   if (LoserRegs && LoserRegs->count(Reg)) {
     Lose();
     return;
   }
   if (Regs.insert(Reg).second) {
-    RateRegister(Reg, Regs, L, SE, DT);
+    RateRegister(Reg, Regs, L, SE, DT, LU);
     if (LoserRegs && isLoser())
       LoserRegs->insert(Reg);
   }
@@ -1237,7 +1255,7 @@
       Lose();
       return;
     }
-    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs);
+    RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs, LU);
     if (isLoser())
       return;
   }
@@ -1246,7 +1264,7 @@
       Lose();
       return;
     }
-    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs);
+    RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs, LU);
     if (isLoser())
       return;
   }
@@ -1512,6 +1530,9 @@
   if (AllFixupsOutsideLoop)
     OS << ", all-fixups-outside-loop";
 
+  if (AllFixupsInsideLoop)
+  OS << ", all-fixups-inside-loop";
+
   if (WidestFixupType)
     OS << ", widest fixup type: " << *WidestFixupType;
 }
@@ -3132,7 +3153,7 @@
 
     const SCEV *S = IU.getExpr(U);
     PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
-    
+
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
     // with rather than just N or i, so we can consider the register
@@ -3181,7 +3202,9 @@
     LF.OperandValToReplace = U.getOperandValToReplace();
     LF.PostIncLoops = TmpPostIncLoops;
     LF.Offset = Offset;
-    LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+    bool UseOutside = LF.isUseFullyOutsideLoop(L);
+    LU.AllFixupsOutsideLoop &= UseOutside;
+    LU.AllFixupsInsideLoop &= !UseOutside;
 
     if (!LU.WidestFixupType ||
         SE.getTypeSizeInBits(LU.WidestFixupType) <
@@ -3330,7 +3353,9 @@
         LF.UserInst = const_cast<Instruction *>(UserInst);
         LF.OperandValToReplace = U;
         LF.Offset = Offset;
-        LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+        bool UseOutside = LF.isUseFullyOutsideLoop(L);
+        LU.AllFixupsOutsideLoop &= UseOutside;
+        LU.AllFixupsInsideLoop &= !UseOutside;
         if (!LU.WidestFixupType ||
             SE.getTypeSizeInBits(LU.WidestFixupType) <
             SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -4249,6 +4274,7 @@
       DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
 
       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
+      LUThatHas->AllFixupsInsideLoop &= LU.AllFixupsInsideLoop;
 
       // Transfer the fixups of LU to LUThatHas.
       for (LSRFixup &Fixup : LU.Fixups) {
@@ -4256,7 +4282,7 @@
         LUThatHas->pushFixup(Fixup);
         DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
       }
-      
+
       // Delete formulae from the new use which are no longer legal.
       bool Any = false;
       for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
Index: test/CodeGen/ARM/arm-and-tst-peephole.ll
===================================================================
--- test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -28,8 +28,10 @@
 ; ARM-NEXT: beq
 
 ; THUMB:      movs r[[R0:[0-9]+]], #3
-; THUMB-NEXT: ands r[[R0]], r
-; THUMB-NEXT: cmp r[[R0]], #0
+; THUMB-NEXT: mvns r[[R1:[0-9]+]], r[[R0]]
+; THUMB-NEXT: ldr r[[R1]],
+; THUMB-NEXT: ands r[[R1]], r[[R0]]
+; THUMB-NEXT: cmp r[[R1]], #0
 ; THUMB-NEXT: beq
 
 ; T2:      ands {{r[0-9]+}}, {{r[0-9]+}}, #3
Index: test/Transforms/LoopStrengthReduce/X86/reject-scev-unknown.ll
===================================================================
--- test/Transforms/LoopStrengthReduce/X86/reject-scev-unknown.ll
+++ test/Transforms/LoopStrengthReduce/X86/reject-scev-unknown.ll
@@ -0,0 +1,58 @@
+; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i32:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure that LSR does not try to get something out of this loop,
+; because the sinking formula depends on SCEVUnknown Phi node %tmp0.
+define i32 @test_01() {
+; CHECK-LABEL: @test_01
+; CHECK:       exit:
+; CHECK-NEXT:  ret i32 %tmp7
+; CHECK-NOT:   lsr-iv
+entry:
+  br label %loop
+
+exit:                                             ; preds = %loop
+  ret i32 %tmp7
+
+loop:                                             ; preds = %loop, %entry
+  %tmp0 = phi i32 [ 0, %entry ], [ %tmp7, %loop ]
+  %indvars.iv = phi i32 [ 4, %entry ], [ %indvars.iv.next, %loop ]
+  %tmp3 = add i32 %tmp0, -1
+  %tmp4 = mul i32 %tmp3, %tmp3
+  %tmp5 = sub i32 %tmp0, %indvars.iv
+  %tmp6 = mul i32 %tmp4, %indvars.iv
+  %tmp7 = add i32 %tmp5, %tmp6
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 80
+  br i1 %exitcond, label %exit, label %loop
+}
+
+; Make sure that LSR does not try to get something out of this loop,
+; because the sinking formula depends on SCEVUnknown Load %lv.
+define i32 @test_02(i32* %p) {
+; CHECK-LABEL: @test_02
+; CHECK:       exit:
+; CHECK-NEXT:  ret i32 %tmp7
+; CHECK-NOT:   lsr-iv
+entry:
+  br label %loop
+
+exit:                                             ; preds = %loop
+  ret i32 %tmp7
+
+loop:                                             ; preds = %loop, %entry
+  %tmp0 = phi i32 [ 0, %entry ], [ %tmp7, %loop ]
+  %indvars.iv = phi i32 [ 4, %entry ], [ %indvars.iv.next, %loop ]
+  %lp = getelementptr inbounds i32, i32* %p, i32 %tmp0
+  %lv = load i32, i32* %lp
+  %tmp3 = add i32 %lv, -1
+  %tmp4 = mul i32 %tmp3, %tmp3
+  %tmp5 = sub i32 %lv, %indvars.iv
+  %tmp6 = mul i32 %tmp4, %indvars.iv
+  %tmp7 = add i32 %tmp5, %tmp6
+  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 80
+  br i1 %exitcond, label %exit, label %loop
+}