diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2781,7 +2781,8 @@
     // If we found some loop invariants, fold them into the recurrence.
     if (!LIOps.empty()) {
       // Compute nowrap flags for the addition of the loop-invariant ops and
-      // the addrec. Temporarily push it as an operand for that purpose.
+      // the addrec. Temporarily push it as an operand for that purpose. These
+      // flags are valid in the scope of the addrec only.
       LIOps.push_back(AddRec);
       SCEV::NoWrapFlags Flags = ComputeFlags(LIOps);
       LIOps.pop_back();
@@ -2790,10 +2791,26 @@
       LIOps.push_back(AddRec->getStart());
 
       SmallVector<const SCEV *, 4> AddRecOps(AddRec->operands());
-      // This follows from the fact that the no-wrap flags on the outer add
-      // expression are applicable on the 0th iteration, when the add recurrence
-      // will be equal to its start value.
-      AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1);
+
+      // It is not in general safe to propagate flags valid on an add within
+      // the addrec scope to one outside it.  We must prove that the inner
+      // scope is guaranteed to execute if the outer one does to be able to
+      // safely propagate.  We know the program is undefined if poison is
+      // produced on the inner scoped addrec.  We also know that *for this use*
+      // the outer scoped add can't overflow (because of the flags we just
+      // computed for the inner scoped add) without the program being undefined.
+      // Proving that entry to the outer scope neccesitates entry to the inner
+      // scope, thus proves the program undefined if the flags would be violated
+      // in the outer scope.
+      const bool CanPropagateFlags = llvm::any_of(LIOps, [&](const SCEV *S) {
+        auto *ReachI = &*AddRecLoop->getHeader()->begin();
+        if (auto *DefI = getDefinedScopeRoot(S))
+          if (isGuaranteedToTransferExecutionTo(DefI, ReachI))
+            return true;
+        return false;
+      });
+      auto AddFlags = CanPropagateFlags ? Flags : SCEV::FlagAnyWrap;
+      AddRecOps[0] = getAddExpr(LIOps, AddFlags, Depth + 1);
 
       // Build the new addrec. Propagate the NUW and NSW flags if both the
       // outer add and the inner addrec are guaranteed to have no overflow.
@@ -6572,7 +6589,13 @@
 const Instruction *ScalarEvolution::getDefinedScopeRoot(const SCEV *S) {
   if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(S))
     return &*AddRec->getLoop()->getHeader()->begin();
-  // TODO: add SCEVConstant and SCEVUnknown caxes here
+  if (isa<SCEVConstant>(S))
+    return &*F.getEntryBlock().begin();
+  if (auto *U = dyn_cast<SCEVUnknown>(S)) {
+    if (auto *I = dyn_cast<Instruction>(U->getValue()))
+      return I;
+    return &*F.getEntryBlock().begin();
+  }
   return nullptr;
 }
 
@@ -6590,6 +6613,15 @@
       ::isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
                                                    B->getIterator()))
     return true;
+
+  auto *BLoop = LI.getLoopFor(B->getParent());
+  if (BLoop && BLoop->getHeader() == B->getParent() &&
+      BLoop->getLoopPreheader() == A->getParent() &&
+      ::isGuaranteedToTransferExecutionToSuccessor(A->getIterator(),
+                                                   A->getParent()->end()) &&
+      ::isGuaranteedToTransferExecutionToSuccessor(B->getParent()->begin(),
+                                                   B->getIterator()))
+    return true;
   return false;
 }
 
@@ -6625,8 +6657,14 @@
     // TODO: We can do better here in some cases.
     if (!isSCEVable(Op->getType()))
       return false;
-    if (auto *DefI = getDefinedScopeRoot(getSCEV(Op)))
-      if (isGuaranteedToTransferExecutionTo(DefI, I))
+    // TODO: the following two lines should be:
+    // if (auto *DefI = getDefinedScopeRoot(getSCEV(Op)))
+    //   if (isGuaranteedToTransferExecutionTo(DefI, I))
+    // We use the following instead for the purposes of seperating a bugfix
+    // change from an optimization change.  Once pr51817 is fully addressed,
+    // we should unlock this power.
+    if (auto *AddRecS = dyn_cast<SCEVAddRecExpr>(getSCEV(Op)))
+      if (isGuaranteedToExecuteForEveryIteration(I, AddRecS->getLoop()))
         return true;
   }
   return false;
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -1328,7 +1328,7 @@
 ; CHECK-NEXT:    %index32 = sub nsw i32 %i, %sub
 ; CHECK-NEXT:    --> {((-1 * %sub) + %start),+,1}<nw><%loop> U: full-set S: full-set Exits: (-1 + (-1 * %sub) + %numIterations) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %index64 = sext i32 %index32 to i64
-; CHECK-NEXT:    --> {((sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))<nsw>)<nsw>,+,1}<nsw><%loop> U: [-4294967295,8589934591) S: [-4294967295,8589934591) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))<nsw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))<nsw>),+,1}<nsw><%loop> U: [-4294967295,8589934591) S: [-4294967295,8589934591) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %sub to i64))<nsw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %ptr = getelementptr inbounds float, float* %input, i64 %index64
 ; CHECK-NEXT:    --> {((4 * (sext i32 %start to i64))<nsw> + (-4 * (sext i32 %sub to i64))<nsw> + %input),+,4}<nw><%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nuw><nsw> + (4 * (sext i32 %start to i64))<nsw> + (-4 * (sext i32 %sub to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %nexti = add nsw i32 %i, 1
@@ -1369,7 +1369,7 @@
 ; CHECK-NEXT:    %index32 = sub nsw i32 %i, %halfsub
 ; CHECK-NEXT:    --> {((-1 * %halfsub)<nsw> + %start)<nsw>,+,1}<nsw><%loop> U: full-set S: full-set Exits: (-1 + (-1 * %halfsub)<nsw> + %numIterations) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %index64 = sext i32 %index32 to i64
-; CHECK-NEXT:    --> {((sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))<nsw>)<nsw>,+,1}<nsw><%loop> U: [-3221225471,7516192767) S: [-3221225471,7516192767) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))<nsw>) LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {((sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))<nsw>),+,1}<nsw><%loop> U: [-3221225471,7516192767) S: [-3221225471,7516192767) Exits: ((zext i32 (-1 + (-1 * %start) + %numIterations) to i64) + (sext i32 %start to i64) + (-1 * (sext i32 %halfsub to i64))<nsw>) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %ptr = getelementptr inbounds float, float* %input, i64 %index64
 ; CHECK-NEXT:    --> {((4 * (sext i32 %start to i64))<nsw> + (-4 * (sext i32 %halfsub to i64))<nsw> + %input),+,4}<nw><%loop> U: full-set S: full-set Exits: ((4 * (zext i32 (-1 + (-1 * %start) + %numIterations) to i64))<nuw><nsw> + (4 * (sext i32 %start to i64))<nsw> + (-4 * (sext i32 %halfsub to i64))<nsw> + %input) LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %nexti = add nsw i32 %i, 1
diff --git a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
--- a/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/incorrect-exit-count.ll
@@ -21,7 +21,7 @@
 ; CHECK-NEXT:    %idxprom20 = zext i32 %storemerge1921 to i64
 ; CHECK-NEXT:    --> {3,+,4294967295}<nuw><nsw><%for.cond6> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %arrayidx7 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20
-; CHECK-NEXT:    --> {(6 + @__const.f.g)<nuw>,+,8589934590}<nuw><%for.cond6> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant }
+; CHECK-NEXT:    --> {(6 + @__const.f.g),+,8589934590}<nuw><%for.cond6> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %for.cond6: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %i = load i16, i16* %arrayidx7, align 2
 ; CHECK-NEXT:    --> %i U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %for.cond6: Variant, %outer.loop: Variant }
 ; CHECK-NEXT:    %storemerge1822.lcssa.ph = phi i32 [ 0, %for.cond6 ]
@@ -45,7 +45,7 @@
 ; CHECK-NEXT:    %idxprom20.3 = zext i32 %storemerge1921.3 to i64
 ; CHECK-NEXT:    --> {3,+,4294967295}<nuw><nsw><%inner.loop> U: [3,4) S: [3,4) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %arrayidx7.3 = getelementptr inbounds [1 x [4 x i16]], [1 x [4 x i16]]* @__const.f.g, i64 0, i64 0, i64 %idxprom20.3
-; CHECK-NEXT:    --> {(6 + @__const.f.g)<nuw>,+,8589934590}<nuw><%inner.loop> U: [6,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
+; CHECK-NEXT:    --> {(6 + @__const.f.g),+,8589934590}<nuw><%inner.loop> U: [0,-1) S: [-9223372036854775808,9223372036854775807) Exits: <<Unknown>> LoopDispositions: { %inner.loop: Computable, %outer.loop: Variant }
 ; CHECK-NEXT:    %i7 = load i16, i16* %arrayidx7.3, align 2
 ; CHECK-NEXT:    --> %i7 U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %inner.loop: Variant, %outer.loop: Variant }
 ; CHECK-NEXT:    %i8 = load volatile i32, i32* @b, align 4
diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -297,11 +297,11 @@
 ; CHECK-NEXT:    %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ]
 ; CHECK-NEXT:    --> {%a,+,%b}<nuw><nsw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i32 %iv, %b
-; CHECK-NEXT:    --> {(%a + %b)<nuw><nsw>,+,%b}<nuw><nsw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(%a + %b),+,%b}<nuw><nsw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %trap = udiv i32 %a, %iv.next
-; CHECK-NEXT:    --> (%a /u {(%a + %b)<nuw><nsw>,+,%b}<nuw><nsw><%loop>) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> (%a /u {(%a + %b),+,%b}<nuw><nsw><%loop>) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %c = add i32 %a, %b
-; CHECK-NEXT:    --> (%a + %b)<nuw><nsw> U: full-set S: full-set
+; CHECK-NEXT:    --> (%a + %b) U: full-set S: full-set
 ; CHECK-NEXT:  Determining loop execution counts for: @test2_a
 ; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable max backedge-taken count.
@@ -335,9 +335,9 @@
 ; CHECK-NEXT:    %iv = phi i32 [ %a, %entry ], [ %iv.next, %loop ]
 ; CHECK-NEXT:    --> {%a,+,%b}<nuw><nsw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %iv.next = add nuw nsw i32 %iv, %b
-; CHECK-NEXT:    --> {(%a + %b)<nuw><nsw>,+,%b}<nuw><nsw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> {(%a + %b),+,%b}<nuw><nsw><%loop> U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:    %trap = udiv i32 %a, %iv.next
-; CHECK-NEXT:    --> (%a /u {(%a + %b)<nuw><nsw>,+,%b}<nuw><nsw><%loop>) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
+; CHECK-NEXT:    --> (%a /u {(%a + %b),+,%b}<nuw><nsw><%loop>) U: full-set S: full-set Exits: <<Unknown>> LoopDispositions: { %loop: Computable }
 ; CHECK-NEXT:  Determining loop execution counts for: @test2_b
 ; CHECK-NEXT:  Loop %loop: Unpredictable backedge-taken count.
 ; CHECK-NEXT:  Loop %loop: Unpredictable max backedge-taken count.
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
@@ -30,13 +30,13 @@
 ; CHECK-NEXT:    %8 = sext i32 %7 to i64
 ; CHECK-NEXT:    --> {1,+,2}<nuw><nsw><%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw>)<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %9 = getelementptr inbounds double, double* %q, i64 %8
-; CHECK-NEXT:    --> {(8 + %q)<nuw>,+,16}<nuw><%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
+; CHECK-NEXT:    --> {(8 + %q),+,16}<nuw><%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %t7 = add nsw i32 %i.01, 1
 ; CHECK-NEXT:    --> {1,+,2}<nuw><nsw><%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((-1 + (2 * (%no /u 2))<nuw>) /u 2))<nuw>)<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %t8 = sext i32 %t7 to i64
 ; CHECK-NEXT:    --> {1,+,2}<nuw><nsw><%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw>)<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %t9 = getelementptr inbounds double, double* %q, i64 %t8
-; CHECK-NEXT:    --> {(8 + %q)<nuw>,+,16}<nuw><%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
+; CHECK-NEXT:    --> {(8 + %q),+,16}<nuw><%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %14 = sext i32 %i.01 to i64
 ; CHECK-NEXT:    --> {0,+,2}<nuw><nsw><%bb> U: [0,2147483645) S: [0,2147483645) Exits: (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %15 = getelementptr inbounds double, double* %d, i64 %14
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -27,13 +27,13 @@
 ; CHECK-NEXT:    %8 = sext i32 %7 to i64
 ; CHECK-NEXT:    --> {1,+,2}<nuw><nsw><%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw>)<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %9 = getelementptr inbounds double, double* %q, i64 %8
-; CHECK-NEXT:    --> {(8 + %q)<nuw>,+,16}<nuw><%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
+; CHECK-NEXT:    --> {(8 + %q),+,16}<nuw><%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %t7 = add nsw i32 %i.01, 1
 ; CHECK-NEXT:    --> {1,+,2}<nuw><nsw><%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((-1 + (2 * (%no /u 2))<nuw>) /u 2))<nuw>)<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %t8 = sext i32 %t7 to i64
 ; CHECK-NEXT:    --> {1,+,2}<nuw><nsw><%bb> U: [1,2147483646) S: [1,2147483646) Exits: (1 + (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw>)<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %t9 = getelementptr inbounds double, double* %q, i64 %t8
-; CHECK-NEXT:    --> {(8 + %q)<nuw>,+,16}<nuw><%bb> U: [8,0) S: [8,0) Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
+; CHECK-NEXT:    --> {(8 + %q),+,16}<nuw><%bb> U: full-set S: full-set Exits: (8 + (16 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> + %q) LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %14 = sext i32 %i.01 to i64
 ; CHECK-NEXT:    --> {0,+,2}<nuw><nsw><%bb> U: [0,2147483645) S: [0,2147483645) Exits: (2 * ((1 + (zext i32 (-2 + (2 * (%no /u 2))<nuw>) to i64))<nuw><nsw> /u 2))<nuw><nsw> LoopDispositions: { %bb: Computable }
 ; CHECK-NEXT:    %15 = getelementptr inbounds double, double* %d, i64 %14
diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
--- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll
@@ -6,9 +6,6 @@
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cmpd 5, 7
-; CHECK-NEXT:    std 19, -104(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 20, -96(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, -88(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 22, -80(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    std 24, -64(1) # 8-byte Folded Spill
@@ -26,90 +23,84 @@
 ; CHECK-NEXT:    mulld 12, 8, 5
 ; CHECK-NEXT:    addi 29, 3, 16
 ; CHECK-NEXT:    mulld 0, 9, 8
-; CHECK-NEXT:    mr 25, 12
+; CHECK-NEXT:    sldi 11, 10, 3
 ; CHECK-NEXT:    mulld 30, 8, 30
 ; CHECK-NEXT:    mulld 28, 8, 28
 ; CHECK-NEXT:    mulld 8, 8, 27
-; CHECK-NEXT:    sldi 11, 10, 3
-; CHECK-NEXT:    li 27, 0
-; CHECK-NEXT:    mr 26, 30
 ; CHECK-NEXT:    b .LBB0_3
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_2:
 ; CHECK-NEXT:    add 5, 5, 9
-; CHECK-NEXT:    add 25, 25, 0
-; CHECK-NEXT:    add 26, 26, 0
+; CHECK-NEXT:    add 12, 12, 0
+; CHECK-NEXT:    add 30, 30, 0
 ; CHECK-NEXT:    add 28, 28, 0
 ; CHECK-NEXT:    add 8, 8, 0
-; CHECK-NEXT:    addi 27, 27, 1
 ; CHECK-NEXT:    cmpd 5, 7
 ; CHECK-NEXT:    bge 0, .LBB0_6
 ; CHECK-NEXT:  .LBB0_3: # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_5 Depth 2
-; CHECK-NEXT:    sub 24, 5, 10
-; CHECK-NEXT:    cmpd 6, 24
+; CHECK-NEXT:    sub 27, 5, 10
+; CHECK-NEXT:    cmpd 6, 27
 ; CHECK-NEXT:    bge 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.4:
-; CHECK-NEXT:    maddld 21, 0, 27, 30
-; CHECK-NEXT:    maddld 20, 0, 27, 12
-; CHECK-NEXT:    add 23, 6, 28
-; CHECK-NEXT:    add 22, 6, 8
-; CHECK-NEXT:    add 20, 6, 20
-; CHECK-NEXT:    add 19, 6, 21
+; CHECK-NEXT:    add 23, 6, 12
+; CHECK-NEXT:    add 22, 6, 30
+; CHECK-NEXT:    add 26, 6, 28
+; CHECK-NEXT:    add 25, 6, 8
+; CHECK-NEXT:    sldi 24, 6, 3
+; CHECK-NEXT:    sldi 26, 26, 3
+; CHECK-NEXT:    sldi 25, 25, 3
 ; CHECK-NEXT:    sldi 23, 23, 3
 ; CHECK-NEXT:    sldi 22, 22, 3
-; CHECK-NEXT:    sldi 21, 6, 3
-; CHECK-NEXT:    add 23, 29, 23
-; CHECK-NEXT:    add 22, 29, 22
-; CHECK-NEXT:    sldi 20, 20, 3
-; CHECK-NEXT:    sldi 19, 19, 3
-; CHECK-NEXT:    add 21, 4, 21
-; CHECK-NEXT:    add 20, 3, 20
-; CHECK-NEXT:    add 19, 3, 19
+; CHECK-NEXT:    add 24, 4, 24
+; CHECK-NEXT:    add 26, 29, 26
+; CHECK-NEXT:    add 25, 29, 25
+; CHECK-NEXT:    add 23, 3, 23
+; CHECK-NEXT:    add 22, 3, 22
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_5: # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    lfd 0, 0(21)
-; CHECK-NEXT:    lfd 1, 0(20)
+; CHECK-NEXT:    lfd 0, 0(24)
+; CHECK-NEXT:    lfd 1, 0(23)
 ; CHECK-NEXT:    add 6, 6, 10
-; CHECK-NEXT:    cmpd 6, 24
+; CHECK-NEXT:    cmpd 6, 27
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(20)
+; CHECK-NEXT:    lfd 1, 8(23)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 16(20)
+; CHECK-NEXT:    lfd 1, 16(23)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 24(20)
-; CHECK-NEXT:    add 20, 20, 11
+; CHECK-NEXT:    lfd 1, 24(23)
+; CHECK-NEXT:    add 23, 23, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(19)
+; CHECK-NEXT:    lfd 1, 0(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(19)
+; CHECK-NEXT:    lfd 1, 8(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 16(19)
+; CHECK-NEXT:    lfd 1, 16(22)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 24(19)
-; CHECK-NEXT:    add 19, 19, 11
+; CHECK-NEXT:    lfd 1, 24(22)
+; CHECK-NEXT:    add 22, 22, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(22)
+; CHECK-NEXT:    lfd 1, -16(25)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(22)
+; CHECK-NEXT:    lfd 1, -8(25)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(22)
+; CHECK-NEXT:    lfd 1, 0(25)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(22)
-; CHECK-NEXT:    add 22, 22, 11
+; CHECK-NEXT:    lfd 1, 8(25)
+; CHECK-NEXT:    add 25, 25, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -16(23)
+; CHECK-NEXT:    lfd 1, -16(26)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, -8(23)
+; CHECK-NEXT:    lfd 1, -8(26)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 0(23)
+; CHECK-NEXT:    lfd 1, 0(26)
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    lfd 1, 8(23)
-; CHECK-NEXT:    add 23, 23, 11
+; CHECK-NEXT:    lfd 1, 8(26)
+; CHECK-NEXT:    add 26, 26, 11
 ; CHECK-NEXT:    xsadddp 0, 0, 1
-; CHECK-NEXT:    stfd 0, 0(21)
-; CHECK-NEXT:    add 21, 21, 11
+; CHECK-NEXT:    stfd 0, 0(24)
+; CHECK-NEXT:    add 24, 24, 11
 ; CHECK-NEXT:    blt 0, .LBB0_5
 ; CHECK-NEXT:    b .LBB0_2
 ; CHECK-NEXT:  .LBB0_6:
@@ -122,9 +113,6 @@
 ; CHECK-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 21, -88(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 20, -96(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 19, -104(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    blr
   %9 = icmp slt i64 %2, %4
   br i1 %9, label %10, label %97
diff --git a/llvm/test/Transforms/LoopIdiom/basic.ll b/llvm/test/Transforms/LoopIdiom/basic.ll
--- a/llvm/test/Transforms/LoopIdiom/basic.ll
+++ b/llvm/test/Transforms/LoopIdiom/basic.ll
@@ -809,28 +809,26 @@
 ; CHECK-NEXT:    [[TOBOOL_9:%.*]] = icmp eq i32 [[C]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_9]], label [[WHILE_END:%.*]], label [[WHILE_BODY_PREHEADER:%.*]]
 ; CHECK:       while.body.preheader:
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[C]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = shl nsw i64 [[TMP1]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], -4
-; CHECK-NEXT:    [[TMP4:%.*]] = add nsw i32 [[C]], -1
-; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
-; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP5]], 2
-; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP3]], [[TMP6]]
-; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP7]]
-; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[TMP1]], -1
-; CHECK-NEXT:    [[TMP9:%.*]] = sub i64 [[TMP8]], [[TMP5]]
-; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[C]], -1
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = shl nsw i64 [[TMP2]], 2
+; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = shl nuw nsw i64 [[TMP4]], 2
+; CHECK-NEXT:    [[TMP6:%.*]] = sub i64 [[TMP3]], [[TMP5]]
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[TMP6]]
+; CHECK-NEXT:    [[TMP7:%.*]] = sub i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
 ; CHECK-NEXT:    [[SCEVGEP12:%.*]] = bitcast i32* [[SCEVGEP1]] to i8*
-; CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[C]] to i64
-; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
-; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP11]], i1 false)
+; CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[C]] to i64
+; CHECK-NEXT:    [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
+; CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[SCEVGEP]], i8* align 4 [[SCEVGEP12]], i64 [[TMP9]], i1 false)
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[DEC10_IN:%.*]] = phi i32 [ [[DEC10:%.*]], [[WHILE_BODY]] ], [ [[C]], [[WHILE_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    [[DEC10]] = add nsw i32 [[DEC10_IN]], -1
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[DEC10]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IDXPROM]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[DEC10]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL]], label [[WHILE_END_LOOPEXIT:%.*]], label [[WHILE_BODY]]