diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -133,7 +133,8 @@
 //
 Pass *createLICMPass();
 Pass *createLICMPass(unsigned LicmMssaOptCap,
-                     unsigned LicmMssaNoAccForPromotionCap);
+                     unsigned LicmMssaNoAccForPromotionCap,
+                     bool AllowSpeculation);
 
 //===----------------------------------------------------------------------===//
 //
diff --git a/llvm/include/llvm/Transforms/Scalar/LICM.h b/llvm/include/llvm/Transforms/Scalar/LICM.h
--- a/llvm/include/llvm/Transforms/Scalar/LICM.h
+++ b/llvm/include/llvm/Transforms/Scalar/LICM.h
@@ -46,14 +46,18 @@
 class LICMPass : public PassInfoMixin<LICMPass> {
   unsigned LicmMssaOptCap;
   unsigned LicmMssaNoAccForPromotionCap;
+  bool LicmAllowSpeculation;
 
 public:
   LICMPass()
       : LicmMssaOptCap(SetLicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
-  LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
+        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(true) {}
+  LICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,
+           bool LicmAllowSpeculation)
       : LicmMssaOptCap(LicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(LicmAllowSpeculation) {}
   PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
@@ -62,14 +66,18 @@
 class LNICMPass : public PassInfoMixin<LNICMPass> {
   unsigned LicmMssaOptCap;
   unsigned LicmMssaNoAccForPromotionCap;
+  bool LicmAllowSpeculation;
 
 public:
   LNICMPass()
       : LicmMssaOptCap(SetLicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap) {}
-  LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap)
+        LicmMssaNoAccForPromotionCap(SetLicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(true) {}
+  LNICMPass(unsigned LicmMssaOptCap, unsigned LicmMssaNoAccForPromotionCap,
+            bool LicmAllowSpeculation)
       : LicmMssaOptCap(LicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(LicmAllowSpeculation) {}
   PreservedAnalyses run(LoopNest &L, LoopAnalysisManager &AM,
                         LoopStandardAnalysisResults &AR, LPMUpdater &U);
 };
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -171,10 +171,13 @@
 /// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
 /// instructions of the loop and loop safety information as arguments.
 /// Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p AllowSpeculation is whether values should be hoisted even if they are not
+/// guaranteed to execute in the loop, but are safe to speculatively execute.
 bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
                  BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
                  MemorySSAUpdater *, ScalarEvolution *, ICFLoopSafetyInfo *,
-                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool);
+                 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *, bool,
+                 bool AllowSpeculation);
 
 /// This function deletes dead loops. The caller of this function needs to
 /// guarantee that the loop is infact dead.
@@ -204,12 +207,14 @@
 /// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
 /// of the loop and loop safety information as arguments.
 /// Diagnostics is emitted via \p ORE. It returns changed status.
+/// \p AllowSpeculation is whether values should be hoisted even if they are not
+/// guaranteed to execute in the loop, but are safe to speculatively execute.
 bool promoteLoopAccessesToScalars(
     const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
     SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
     PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
     Loop *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
-    OptimizationRemarkEmitter *);
+    OptimizationRemarkEmitter *, bool AllowSpeculation);
 
 /// Does a BFS from a given node to all of its children inside a given loop.
 /// The returned vector of nodes includes the starting point.
diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp
--- a/llvm/lib/Passes/PassBuilderPipelines.cpp
+++ b/llvm/lib/Passes/PassBuilderPipelines.cpp
@@ -293,14 +293,19 @@
   LPM1.addPass(LoopSimplifyCFGPass());
 
   // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
+  // to reduce amount of IR that will have to be duplicated. However,
+  // do not perform speculative hoisting the first time as LICM
+  // will destroy metadata that may not need to be destroyed if run
+  // after loop rotation.
   // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/false));
 
   LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true,
                               isLTOPreLink(Phase)));
   // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/true));
   LPM1.addPass(SimpleLoopUnswitchPass());
   if (EnableLoopFlatten)
     LPM1.addPass(LoopFlattenPass());
@@ -470,15 +475,20 @@
   LPM1.addPass(LoopSimplifyCFGPass());
 
   // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
+  // to reduce amount of IR that will have to be duplicated. However,
+  // do not perform speculative hoisting the first time as LICM
+  // will destroy metadata that may not need to be destroyed if run
+  // after loop rotation.
   // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/false));
 
   // Disable header duplication in loop rotation at -Oz.
   LPM1.addPass(
       LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase)));
   // TODO: Investigate promotion cap for O1.
-  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/true));
   LPM1.addPass(
       SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 &&
                              EnableO3NonTrivialUnswitching));
@@ -575,7 +585,8 @@
 
   FPM.addPass(DSEPass());
   FPM.addPass(createFunctionToLoopPassAdaptor(
-      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+               /*AllowSpeculation=*/true),
       /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
 
   FPM.addPass(CoroElidePass());
@@ -1019,7 +1030,8 @@
     ExtraPasses.addPass(CorrelatedValuePropagationPass());
     ExtraPasses.addPass(InstCombinePass());
     LoopPassManager LPM;
-    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+    LPM.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                         /*AllowSpeculation=*/true));
     LPM.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level ==
                                        OptimizationLevel::O3));
     ExtraPasses.addPass(
@@ -1087,7 +1099,8 @@
     FPM.addPass(
         RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
     FPM.addPass(createFunctionToLoopPassAdaptor(
-        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+        LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+                 /*AllowSpeculation=*/true),
         /*UseMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
   }
 
@@ -1627,7 +1640,8 @@
 
   FunctionPassManager MainFPM;
   MainFPM.addPass(createFunctionToLoopPassAdaptor(
-      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap,
+               /*AllowSpeculation=*/true),
       /*USeMemorySSA=*/true, /*UseBlockFrequencyInfo=*/true));
 
   if (RunNewGVN)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -458,13 +458,18 @@
     MPM.add(createLoopSimplifyCFGPass());
   }
   // Try to remove as much code from the loop header as possible,
-  // to reduce amount of IR that will have to be duplicated.
+  // to reduce amount of IR that will have to be duplicated. However,
+  // do not perform speculative hoisting the first time as LICM
+  // will destroy metadata that may not need to be destroyed if run
+  // after loop rotation.
   // TODO: Investigate promotion cap for O1.
-  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                         /*AllowSpeculation=*/false));
   // Rotate Loop - disable header duplication at -Oz
   MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1, PrepareForLTO));
   // TODO: Investigate promotion cap for O1.
-  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+  MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                         /*AllowSpeculation=*/true));
   if (EnableSimpleLoopUnswitch)
     MPM.add(createSimpleLoopUnswitchLegacyPass());
   else
@@ -529,7 +534,8 @@
   // TODO: Investigate if this is too expensive at O1.
   if (OptLevel > 1) {
     MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
-    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                           /*AllowSpeculation=*/true));
   }
 
   addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
@@ -588,7 +594,8 @@
     PM.add(createEarlyCSEPass());
     PM.add(createCorrelatedValuePropagationPass());
     PM.add(createInstructionCombiningPass());
-    PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                          /*AllowSpeculation=*/true));
     PM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));
     PM.add(createCFGSimplificationPass(
         SimplifyCFGOptions().convertSwitchRangeToICmp(true)));
@@ -651,7 +658,8 @@
       // unrolled loop is a inner loop, then the prologue will be inside the
       // outer loop. LICM pass can help to promote the runtime check out if the
       // checked value is loop invariant.
-      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+      PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                            /*AllowSpeculation=*/true));
     }
 
     PM.add(createWarnMissedTransformationsPass());
@@ -898,7 +906,8 @@
   // later might get benefit of no-alias assumption in clone loop.
   if (UseLoopVersioningLICM) {
     MPM.add(createLoopVersioningLICMPass());    // Do LoopVersioningLICM
-    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+    MPM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                           /*AllowSpeculation=*/true));
   }
 
   // We add a fresh GlobalsModRef run at this point. This is particularly
@@ -1133,7 +1142,8 @@
   // Run a few AA driven optimizations here and now, to cleanup the code.
   PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
 
-  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap));
+  PM.add(createLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                        /*AllowSpeculation=*/true));
   PM.add(NewGVN ? createNewGVNPass()
                 : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
   PM.add(createMemCpyOptPass());            // Remove dead memcpys.
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -149,13 +149,11 @@
                  BlockFrequencyInfo *BFI, const Loop *CurLoop,
                  ICFLoopSafetyInfo *SafetyInfo, MemorySSAUpdater *MSSAU,
                  OptimizationRemarkEmitter *ORE);
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
-                                           const DominatorTree *DT,
-                                           const TargetLibraryInfo *TLI,
-                                           const Loop *CurLoop,
-                                           const LoopSafetyInfo *SafetyInfo,
-                                           OptimizationRemarkEmitter *ORE,
-                                           const Instruction *CtxI = nullptr);
+static bool isSafeToExecuteUnconditionally(
+    Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+    const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+    bool AllowSpeculation);
 static bool pointerInvalidatedByLoop(MemoryLocation MemLoc,
                                      AliasSetTracker *CurAST, Loop *CurLoop,
                                      AAResults *AA);
@@ -188,21 +186,26 @@
                  OptimizationRemarkEmitter *ORE, bool LoopNestMode = false);
 
   LoopInvariantCodeMotion(unsigned LicmMssaOptCap,
-                          unsigned LicmMssaNoAccForPromotionCap)
+                          unsigned LicmMssaNoAccForPromotionCap,
+                          bool LicmAllowSpeculation)
       : LicmMssaOptCap(LicmMssaOptCap),
-        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap) {}
+        LicmMssaNoAccForPromotionCap(LicmMssaNoAccForPromotionCap),
+        LicmAllowSpeculation(LicmAllowSpeculation) {}
 
 private:
   unsigned LicmMssaOptCap;
   unsigned LicmMssaNoAccForPromotionCap;
+  bool LicmAllowSpeculation;
 };
 
 struct LegacyLICMPass : public LoopPass {
   static char ID; // Pass identification, replacement for typeid
   LegacyLICMPass(
       unsigned LicmMssaOptCap = SetLicmMssaOptCap,
-      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap)
-      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap) {
+      unsigned LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap,
+      bool LicmAllowSpeculation = true)
+      : LoopPass(ID), LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                           LicmAllowSpeculation) {
     initializeLegacyLICMPassPass(*PassRegistry::getPassRegistry());
   }
 
@@ -265,7 +268,8 @@
   // but ORE cannot be preserved (see comment before the pass definition).
   OptimizationRemarkEmitter ORE(L.getHeader()->getParent());
 
-  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                               LicmAllowSpeculation);
   if (!LICM.runOnLoop(&L, &AR.AA, &AR.LI, &AR.DT, AR.BFI, &AR.TLI, &AR.TTI,
                       &AR.SE, AR.MSSA, &ORE))
     return PreservedAnalyses::all();
@@ -290,7 +294,8 @@
   // but ORE cannot be preserved (see comment before the pass definition).
   OptimizationRemarkEmitter ORE(LN.getParent());
 
-  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+  LoopInvariantCodeMotion LICM(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                               LicmAllowSpeculation);
 
   Loop &OutermostLoop = LN.getOutermostLoop();
   bool Changed = LICM.runOnLoop(&OutermostLoop, &AR.AA, &AR.LI, &AR.DT, AR.BFI,
@@ -321,8 +326,10 @@
 
 Pass *llvm::createLICMPass() { return new LegacyLICMPass(); }
 Pass *llvm::createLICMPass(unsigned LicmMssaOptCap,
-                           unsigned LicmMssaNoAccForPromotionCap) {
-  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap);
+                           unsigned LicmMssaNoAccForPromotionCap,
+                           bool LicmAllowSpeculation) {
+  return new LegacyLICMPass(LicmMssaOptCap, LicmMssaNoAccForPromotionCap,
+                            LicmAllowSpeculation);
 }
 
 llvm::SinkAndHoistLICMFlags::SinkAndHoistLICMFlags(bool IsSink, Loop *L,
@@ -418,7 +425,8 @@
   Flags.setIsSink(false);
   if (Preheader)
     Changed |= hoistRegion(DT->getNode(L->getHeader()), AA, LI, DT, BFI, TLI, L,
-                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode);
+                           &MSSAU, SE, &SafetyInfo, Flags, ORE, LoopNestMode,
+                           LicmAllowSpeculation);
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
@@ -460,8 +468,8 @@
         for (const SmallSetVector<Value *, 8> &PointerMustAliases :
              collectPromotionCandidates(MSSA, AA, L)) {
           LocalPromoted |= promoteLoopAccessesToScalars(
-              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC,
-              LI, DT, TLI, L, &MSSAU, &SafetyInfo, ORE);
+              PointerMustAliases, ExitBlocks, InsertPts, MSSAInsertPts, PIC, LI,
+              DT, TLI, L, &MSSAU, &SafetyInfo, ORE, LicmAllowSpeculation);
         }
         Promoted |= LocalPromoted;
       } while (LocalPromoted);
@@ -825,7 +833,8 @@
                        MemorySSAUpdater *MSSAU, ScalarEvolution *SE,
                        ICFLoopSafetyInfo *SafetyInfo,
                        SinkAndHoistLICMFlags &Flags,
-                       OptimizationRemarkEmitter *ORE, bool LoopNestMode) {
+                       OptimizationRemarkEmitter *ORE, bool LoopNestMode,
+                       bool AllowSpeculation) {
   // Verify inputs.
   assert(N != nullptr && AA != nullptr && LI != nullptr && DT != nullptr &&
          CurLoop != nullptr && MSSAU != nullptr && SafetyInfo != nullptr &&
@@ -877,7 +886,7 @@
                              true, &Flags, ORE) &&
           isSafeToExecuteUnconditionally(
               I, DT, TLI, CurLoop, SafetyInfo, ORE,
-              CurLoop->getLoopPreheader()->getTerminator())) {
+              CurLoop->getLoopPreheader()->getTerminator(), AllowSpeculation)) {
         hoist(I, DT, CurLoop, CFH.getOrCreateHoistedBlock(BB), SafetyInfo,
               MSSAU, SE, ORE);
         HoistedInstructions.push_back(&I);
@@ -1774,14 +1783,12 @@
 /// Only sink or hoist an instruction if it is not a trapping instruction,
 /// or if the instruction is known not to trap when moved to the preheader.
 /// or if it is a trapping instruction and is guaranteed to execute.
-static bool isSafeToExecuteUnconditionally(Instruction &Inst,
-                                           const DominatorTree *DT,
-                                           const TargetLibraryInfo *TLI,
-                                           const Loop *CurLoop,
-                                           const LoopSafetyInfo *SafetyInfo,
-                                           OptimizationRemarkEmitter *ORE,
-                                           const Instruction *CtxI) {
-  if (isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
+static bool isSafeToExecuteUnconditionally(
+    Instruction &Inst, const DominatorTree *DT, const TargetLibraryInfo *TLI,
+    const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+    OptimizationRemarkEmitter *ORE, const Instruction *CtxI,
+    bool AllowSpeculation) {
+  if (AllowSpeculation && isSafeToSpeculativelyExecute(&Inst, CtxI, DT, TLI))
     return true;
 
   bool GuaranteedToExecute =
@@ -1949,7 +1956,7 @@
     SmallVectorImpl<MemoryAccess *> &MSSAInsertPts, PredIteratorCache &PIC,
     LoopInfo *LI, DominatorTree *DT, const TargetLibraryInfo *TLI,
     Loop *CurLoop, MemorySSAUpdater *MSSAU, ICFLoopSafetyInfo *SafetyInfo,
-    OptimizationRemarkEmitter *ORE) {
+    OptimizationRemarkEmitter *ORE, bool AllowSpeculation) {
   // Verify inputs.
   assert(LI != nullptr && DT != nullptr && CurLoop != nullptr &&
          SafetyInfo != nullptr &&
@@ -2054,9 +2061,9 @@
         // to execute does as well.  Thus we can increase our guaranteed
         // alignment as well.
         if (!DereferenceableInPH || (InstAlignment > Alignment))
-          if (isSafeToExecuteUnconditionally(*Load, DT, TLI, CurLoop,
-                                             SafetyInfo, ORE,
-                                             Preheader->getTerminator())) {
+          if (isSafeToExecuteUnconditionally(
+                  *Load, DT, TLI, CurLoop, SafetyInfo, ORE,
+                  Preheader->getTerminator(), AllowSpeculation)) {
             DereferenceableInPH = true;
             Alignment = std::max(Alignment, InstAlignment);
           }
diff --git a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
--- a/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
+++ b/llvm/test/Transforms/LoopUnroll/AArch64/runtime-unroll-generic.ll
@@ -6,21 +6,21 @@
 define void @runtime_unroll_generic(i32 %arg_0, i32* %arg_1, i16* %arg_2, i16* %arg_3) {
 ; CHECK-A55-LABEL: @runtime_unroll_generic(
 ; CHECK-A55-NEXT:  entry:
+; CHECK-A55-NEXT:    [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
+; CHECK-A55-NEXT:    br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]]
+; CHECK-A55:       for.body6.lr.ph:
 ; CHECK-A55-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef
 ; CHECK-A55-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef
 ; CHECK-A55-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef
-; CHECK-A55-NEXT:    [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
-; CHECK-A55-NEXT:    br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_PREHEADER:%.*]]
-; CHECK-A55:       for.body6.preheader:
 ; CHECK-A55-NEXT:    [[TMP0:%.*]] = add i32 [[ARG_0]], -1
 ; CHECK-A55-NEXT:    [[XTRAITER:%.*]] = and i32 [[ARG_0]], 3
 ; CHECK-A55-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-A55-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_PREHEADER_NEW:%.*]]
-; CHECK-A55:       for.body6.preheader.new:
+; CHECK-A55-NEXT:    br i1 [[TMP1]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY6_LR_PH_NEW:%.*]]
+; CHECK-A55:       for.body6.lr.ph.new:
 ; CHECK-A55-NEXT:    [[UNROLL_ITER:%.*]] = and i32 [[ARG_0]], -4
 ; CHECK-A55-NEXT:    br label [[FOR_BODY6:%.*]]
 ; CHECK-A55:       for.body6:
-; CHECK-A55-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_PREHEADER_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
+; CHECK-A55-NEXT:    [[NITER:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH_NEW]] ], [ [[NITER_NEXT_3:%.*]], [[FOR_BODY6]] ]
 ; CHECK-A55-NEXT:    [[TMP2:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
 ; CHECK-A55-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
 ; CHECK-A55-NEXT:    [[TMP3:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
@@ -93,13 +93,15 @@
 ;
 ; CHECK-GENERIC-LABEL: @runtime_unroll_generic(
 ; CHECK-GENERIC-NEXT:  entry:
+; CHECK-GENERIC-NEXT:    [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
+; CHECK-GENERIC-NEXT:    br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6_LR_PH:%.*]]
+; CHECK-GENERIC:       for.body6.lr.ph:
 ; CHECK-GENERIC-NEXT:    [[ARRAYIDX10:%.*]] = getelementptr inbounds i16, i16* [[ARG_2:%.*]], i64 undef
 ; CHECK-GENERIC-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i16, i16* [[ARG_3:%.*]], i64 undef
 ; CHECK-GENERIC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i32, i32* [[ARG_1:%.*]], i64 undef
-; CHECK-GENERIC-NEXT:    [[CMP52_NOT:%.*]] = icmp eq i32 [[ARG_0:%.*]], 0
-; CHECK-GENERIC-NEXT:    br i1 [[CMP52_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY6:%.*]]
+; CHECK-GENERIC-NEXT:    br label [[FOR_BODY6:%.*]]
 ; CHECK-GENERIC:       for.body6:
-; CHECK-GENERIC-NEXT:    [[K_03:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY6]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-GENERIC-NEXT:    [[K_03:%.*]] = phi i32 [ 0, [[FOR_BODY6_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY6]] ]
 ; CHECK-GENERIC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX10]], align 2
 ; CHECK-GENERIC-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
 ; CHECK-GENERIC-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX14]], align 2
diff --git a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
--- a/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
+++ b/llvm/test/Transforms/PhaseOrdering/AArch64/matrix-extract-insert.ll
@@ -88,10 +88,10 @@
 define void @matrix_extract_insert_loop(i32 %i, [225 x double]* nonnull align 8 dereferenceable(1800) %A, [225 x double]* nonnull align 8 dereferenceable(1800) %B) {
 ; CHECK-LABEL: @matrix_extract_insert_loop(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP212_NOT:%.*]] = icmp eq i32 [[I:%.*]], 0
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast [225 x double]* [[A:%.*]] to <225 x double>*
-; CHECK-NEXT:    [[CONV6:%.*]] = zext i32 [[I:%.*]] to i64
+; CHECK-NEXT:    [[CONV6:%.*]] = zext i32 [[I]] to i64
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [225 x double]* [[B:%.*]] to <225 x double>*
-; CHECK-NEXT:    [[CMP212_NOT:%.*]] = icmp eq i32 [[I]], 0
 ; CHECK-NEXT:    br i1 [[CMP212_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US:%.*]]
 ; CHECK:       for.cond1.preheader.us:
 ; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[I]], 225
@@ -145,8 +145,8 @@
 ; CHECK-NEXT:    br label [[FOR_BODY4_US_2:%.*]]
 ; CHECK:       for.body4.us.2:
 ; CHECK-NEXT:    [[K_013_US_2:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_1]] ], [ [[INC_US_2:%.*]], [[FOR_BODY4_US_2]] ]
-; CHECK-NEXT:    [[NARROW16:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30
-; CHECK-NEXT:    [[TMP17:%.*]] = zext i32 [[NARROW16]] to i64
+; CHECK-NEXT:    [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_2]], 30
+; CHECK-NEXT:    [[TMP17:%.*]] = zext i32 [[NARROW17]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = icmp ult i32 [[K_013_US_2]], 195
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP18]])
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP17]]
@@ -168,8 +168,8 @@
 ; CHECK-NEXT:    br label [[FOR_BODY4_US_3:%.*]]
 ; CHECK:       for.body4.us.3:
 ; CHECK-NEXT:    [[K_013_US_3:%.*]] = phi i32 [ 0, [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US_2]] ], [ [[INC_US_3:%.*]], [[FOR_BODY4_US_3]] ]
-; CHECK-NEXT:    [[NARROW17:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45
-; CHECK-NEXT:    [[TMP24:%.*]] = zext i32 [[NARROW17]] to i64
+; CHECK-NEXT:    [[NARROW18:%.*]] = add nuw nsw i32 [[K_013_US_3]], 45
+; CHECK-NEXT:    [[TMP24:%.*]] = zext i32 [[NARROW18]] to i64
 ; CHECK-NEXT:    [[TMP25:%.*]] = icmp ult i32 [[K_013_US_3]], 180
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 [[TMP25]])
 ; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds <225 x double>, <225 x double>* [[TMP0]], i64 0, i64 [[TMP24]]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
--- a/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/hoist-load-of-baseptr.ll
@@ -44,15 +44,15 @@
 ; OLDPM_O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
 ; OLDPM_O2-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; OLDPM_O2-NEXT:  entry:
-; OLDPM_O2-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
 ; OLDPM_O2-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
+; OLDPM_O2-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
+; OLDPM_O2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; OLDPM_O2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
 ; OLDPM_O2-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8
 ; OLDPM_O2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEMS]]
 ; OLDPM_O2-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; OLDPM_O2:       for.cond1.preheader:
 ; OLDPM_O2-NEXT:    [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
-; OLDPM_O2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; OLDPM_O2-NEXT:    br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]]
 ; OLDPM_O2:       for.body4.preheader:
 ; OLDPM_O2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER11:%.*]], label [[VECTOR_BODY:%.*]]
@@ -97,8 +97,9 @@
 ; OLDPM_O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
 ; OLDPM_O3-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; OLDPM_O3-NEXT:  entry:
-; OLDPM_O3-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
 ; OLDPM_O3-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
+; OLDPM_O3-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
+; OLDPM_O3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; OLDPM_O3-NEXT:    br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
 ; OLDPM_O3:       for.cond1.preheader.us.preheader:
 ; OLDPM_O3-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
@@ -107,7 +108,6 @@
 ; OLDPM_O3-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
 ; OLDPM_O3:       for.cond1.preheader.us:
 ; OLDPM_O3-NEXT:    [[I_08_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; OLDPM_O3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; OLDPM_O3-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]]
 ; OLDPM_O3:       vector.body:
 ; OLDPM_O3-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ]
@@ -150,12 +150,12 @@
 ; NEWPM_O1-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
 ; NEWPM_O1-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; NEWPM_O1-NEXT:  entry:
-; NEWPM_O1-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
 ; NEWPM_O1-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
+; NEWPM_O1-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
+; NEWPM_O1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; NEWPM_O1-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; NEWPM_O1:       for.cond1.preheader:
 ; NEWPM_O1-NEXT:    [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
-; NEWPM_O1-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; NEWPM_O1-NEXT:    br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4:%.*]]
 ; NEWPM_O1:       for.cond.cleanup:
 ; NEWPM_O1-NEXT:    ret void
@@ -176,15 +176,15 @@
 ; NEWPM_O2-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
 ; NEWPM_O2-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; NEWPM_O2-NEXT:  entry:
-; NEWPM_O2-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
 ; NEWPM_O2-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
+; NEWPM_O2-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
+; NEWPM_O2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; NEWPM_O2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
 ; NEWPM_O2-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEMS]], -8
 ; NEWPM_O2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEMS]]
 ; NEWPM_O2-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; NEWPM_O2:       for.cond1.preheader:
 ; NEWPM_O2-NEXT:    [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
-; NEWPM_O2-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; NEWPM_O2-NEXT:    br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4_PREHEADER:%.*]]
 ; NEWPM_O2:       for.body4.preheader:
 ; NEWPM_O2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_PREHEADER11:%.*]], label [[VECTOR_BODY:%.*]]
@@ -229,8 +229,9 @@
 ; NEWPM_O3-LABEL: define {{[^@]+}}@_Z7computeRSt6vectorIiSaIiEEy
 ; NEWPM_O3-SAME: (%"class.std::vector"* nocapture noundef nonnull readonly align 8 dereferenceable(24) [[DATA:%.*]], i64 noundef [[NUMELEMS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
 ; NEWPM_O3-NEXT:  entry:
-; NEWPM_O3-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
 ; NEWPM_O3-NEXT:    [[CMP26_NOT:%.*]] = icmp eq i64 [[NUMELEMS]], 0
+; NEWPM_O3-NEXT:    [[_M_START_I:%.*]] = getelementptr inbounds %"class.std::vector", %"class.std::vector"* [[DATA]], i64 0, i32 0, i32 0, i32 0, i32 0
+; NEWPM_O3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; NEWPM_O3-NEXT:    br i1 [[CMP26_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER_US_PREHEADER:%.*]]
 ; NEWPM_O3:       for.cond1.preheader.us.preheader:
 ; NEWPM_O3-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEMS]], 8
@@ -239,7 +240,6 @@
 ; NEWPM_O3-NEXT:    br label [[FOR_COND1_PREHEADER_US:%.*]]
 ; NEWPM_O3:       for.cond1.preheader.us:
 ; NEWPM_O3-NEXT:    [[I_08_US:%.*]] = phi i64 [ [[INC7_US:%.*]], [[FOR_COND1_FOR_COND_CLEANUP3_CRIT_EDGE_US:%.*]] ], [ 0, [[FOR_COND1_PREHEADER_US_PREHEADER]] ]
-; NEWPM_O3-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[_M_START_I]], align 8
 ; NEWPM_O3-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY4_US_PREHEADER:%.*]], label [[VECTOR_BODY:%.*]]
 ; NEWPM_O3:       vector.body:
 ; NEWPM_O3-NEXT:    [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ], [ 0, [[FOR_COND1_PREHEADER_US]] ]
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
--- a/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/speculation-vs-tbaa.ll
@@ -14,13 +14,15 @@
 define void @licm(double** align 8 dereferenceable(8) %_M_start.i, i64 %numElem) {
 ; OLDPM_O1-LABEL: @licm(
 ; OLDPM_O1-NEXT:  entry:
-; OLDPM_O1-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8
 ; OLDPM_O1-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
-; OLDPM_O1-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; OLDPM_O1-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
+; OLDPM_O1:       for.body.lr.ph:
+; OLDPM_O1-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; OLDPM_O1-NEXT:    br label [[FOR_BODY:%.*]]
 ; OLDPM_O1:       for.body:
-; OLDPM_O1-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; OLDPM_O1-NEXT:    [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; OLDPM_O1-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
-; OLDPM_O1-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; OLDPM_O1-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; OLDPM_O1-NEXT:    [[INC]] = add nuw i64 [[K_02]], 1
 ; OLDPM_O1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
 ; OLDPM_O1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
@@ -29,12 +31,12 @@
 ;
 ; OLDPM_O23-LABEL: @licm(
 ; OLDPM_O23-NEXT:  entry:
-; OLDPM_O23-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8
 ; OLDPM_O23-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
-; OLDPM_O23-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
-; OLDPM_O23:       for.body.preheader:
+; OLDPM_O23-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
+; OLDPM_O23:       for.body.lr.ph:
+; OLDPM_O23-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
 ; OLDPM_O23-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4
-; OLDPM_O23-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]]
+; OLDPM_O23-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
 ; OLDPM_O23:       vector.ph:
 ; OLDPM_O23-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4
 ; OLDPM_O23-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -42,38 +44,40 @@
 ; OLDPM_O23-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; OLDPM_O23-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]]
 ; OLDPM_O23-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
-; OLDPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; OLDPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; OLDPM_O23-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
 ; OLDPM_O23-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
-; OLDPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]]
+; OLDPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]]
 ; OLDPM_O23-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; OLDPM_O23-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; OLDPM_O23-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; OLDPM_O23-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; OLDPM_O23:       middle.block:
 ; OLDPM_O23-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]]
-; OLDPM_O23-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]]
-; OLDPM_O23:       for.body.preheader3:
-; OLDPM_O23-NEXT:    [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; OLDPM_O23-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]]
+; OLDPM_O23:       for.body.preheader:
+; OLDPM_O23-NEXT:    [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; OLDPM_O23-NEXT:    br label [[FOR_BODY:%.*]]
 ; OLDPM_O23:       for.body:
-; OLDPM_O23-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ]
+; OLDPM_O23-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ]
 ; OLDPM_O23-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
-; OLDPM_O23-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]]
+; OLDPM_O23-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]]
 ; OLDPM_O23-NEXT:    [[INC]] = add nuw i64 [[K_02]], 1
 ; OLDPM_O23-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
-; OLDPM_O23-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; OLDPM_O23-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; OLDPM_O23:       for.cond.cleanup:
 ; OLDPM_O23-NEXT:    ret void
 ;
 ; NEWPM_O1-LABEL: @licm(
 ; NEWPM_O1-NEXT:  entry:
-; NEWPM_O1-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8
 ; NEWPM_O1-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
-; NEWPM_O1-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; NEWPM_O1-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
+; NEWPM_O1:       for.body.lr.ph:
+; NEWPM_O1-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; NEWPM_O1-NEXT:    br label [[FOR_BODY:%.*]]
 ; NEWPM_O1:       for.body:
-; NEWPM_O1-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
+; NEWPM_O1-NEXT:    [[K_02:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
 ; NEWPM_O1-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
-; NEWPM_O1-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; NEWPM_O1-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; NEWPM_O1-NEXT:    [[INC]] = add nuw i64 [[K_02]], 1
 ; NEWPM_O1-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
 ; NEWPM_O1-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
@@ -82,12 +86,12 @@
 ;
 ; NEWPM_O23-LABEL: @licm(
 ; NEWPM_O23-NEXT:  entry:
-; NEWPM_O23-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8
 ; NEWPM_O23-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i64 [[NUMELEM:%.*]], 0
-; NEWPM_O23-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
-; NEWPM_O23:       for.body.preheader:
+; NEWPM_O23-NEXT:    br i1 [[CMP1_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_LR_PH:%.*]]
+; NEWPM_O23:       for.body.lr.ph:
+; NEWPM_O23-NEXT:    [[TMP0:%.*]] = load double*, double** [[_M_START_I:%.*]], align 8, !tbaa [[TBAA3:![0-9]+]]
 ; NEWPM_O23-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[NUMELEM]], 4
-; NEWPM_O23-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER3:%.*]], label [[VECTOR_PH:%.*]]
+; NEWPM_O23-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER:%.*]], label [[VECTOR_PH:%.*]]
 ; NEWPM_O23:       vector.ph:
 ; NEWPM_O23-NEXT:    [[N_VEC:%.*]] = and i64 [[NUMELEM]], -4
 ; NEWPM_O23-NEXT:    br label [[VECTOR_BODY:%.*]]
@@ -95,26 +99,26 @@
 ; NEWPM_O23-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
 ; NEWPM_O23-NEXT:    [[TMP1:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[INDEX]]
 ; NEWPM_O23-NEXT:    [[TMP2:%.*]] = bitcast double* [[TMP1]] to <2 x double>*
-; NEWPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA3:![0-9]+]]
+; NEWPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP2]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; NEWPM_O23-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, double* [[TMP1]], i64 2
 ; NEWPM_O23-NEXT:    [[TMP4:%.*]] = bitcast double* [[TMP3]] to <2 x double>*
-; NEWPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA3]]
+; NEWPM_O23-NEXT:    store <2 x double> <double 2.000000e+00, double 2.000000e+00>, <2 x double>* [[TMP4]], align 8, !tbaa [[TBAA8]]
 ; NEWPM_O23-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; NEWPM_O23-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; NEWPM_O23-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; NEWPM_O23-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
 ; NEWPM_O23:       middle.block:
 ; NEWPM_O23-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[NUMELEM]]
-; NEWPM_O23-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER3]]
-; NEWPM_O23:       for.body.preheader3:
-; NEWPM_O23-NEXT:    [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
+; NEWPM_O23-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_PREHEADER]]
+; NEWPM_O23:       for.body.preheader:
+; NEWPM_O23-NEXT:    [[K_02_PH:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
 ; NEWPM_O23-NEXT:    br label [[FOR_BODY:%.*]]
 ; NEWPM_O23:       for.body:
-; NEWPM_O23-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER3]] ]
+; NEWPM_O23-NEXT:    [[K_02:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[K_02_PH]], [[FOR_BODY_PREHEADER]] ]
 ; NEWPM_O23-NEXT:    [[ADD_PTR_I:%.*]] = getelementptr inbounds double, double* [[TMP0]], i64 [[K_02]]
-; NEWPM_O23-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA3]]
+; NEWPM_O23-NEXT:    store double 2.000000e+00, double* [[ADD_PTR_I]], align 8, !tbaa [[TBAA8]]
 ; NEWPM_O23-NEXT:    [[INC]] = add nuw i64 [[K_02]], 1
 ; NEWPM_O23-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[NUMELEM]]
-; NEWPM_O23-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; NEWPM_O23-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
 ; NEWPM_O23:       for.cond.cleanup:
 ; NEWPM_O23-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
--- a/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/spurious-peeling.ll
@@ -32,10 +32,10 @@
 ; OLDPM_O23-NEXT:    [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; OLDPM_O23-NEXT:    br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
 ; OLDPM_O23:       for.body7.lr.ph.i:
-; OLDPM_O23-NEXT:    [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0
 ; OLDPM_O23-NEXT:    [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0
 ; OLDPM_O23-NEXT:    [[TMP2:%.*]] = load float*, float** [[BASE_I6_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; OLDPM_O23-NEXT:    [[ARRAYIDX_I7_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef
+; OLDPM_O23-NEXT:    [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0
 ; OLDPM_O23-NEXT:    [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I4_I]], align 8, !tbaa [[TBAA0]]
 ; OLDPM_O23-NEXT:    [[BASE_I2_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0
 ; OLDPM_O23-NEXT:    [[TMP4:%.*]] = load float*, float** [[BASE_I2_I]], align 8, !tbaa [[TBAA8]]
@@ -64,10 +64,10 @@
 ; NEWPM_O1-NEXT:    [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; NEWPM_O1-NEXT:    br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
 ; NEWPM_O1:       for.body7.lr.ph.i:
-; NEWPM_O1-NEXT:    [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0
 ; NEWPM_O1-NEXT:    [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0
 ; NEWPM_O1-NEXT:    [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; NEWPM_O1-NEXT:    [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef
+; NEWPM_O1-NEXT:    [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0
 ; NEWPM_O1-NEXT:    [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]]
 ; NEWPM_O1-NEXT:    [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0
 ; NEWPM_O1-NEXT:    [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]]
@@ -95,10 +95,10 @@
 ; NEWPM_O23-NEXT:    [[CMP510_NOT_I:%.*]] = icmp eq i32 [[TMP1]], 0
 ; NEWPM_O23-NEXT:    br i1 [[CMP510_NOT_I]], label [[_ZN12FLOATVECPAIR6VECINCEV_EXIT:%.*]], label [[FOR_BODY7_LR_PH_I:%.*]]
 ; NEWPM_O23:       for.body7.lr.ph.i:
-; NEWPM_O23-NEXT:    [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0
 ; NEWPM_O23-NEXT:    [[BASE_I4_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP0]], i64 undef, i32 0
 ; NEWPM_O23-NEXT:    [[TMP2:%.*]] = load float*, float** [[BASE_I4_I]], align 8, !tbaa [[TBAA8:![0-9]+]]
 ; NEWPM_O23-NEXT:    [[ARRAYIDX_I5_I:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 undef
+; NEWPM_O23-NEXT:    [[BASE_I6_I:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[FVP]], i64 0, i32 0, i32 0
 ; NEWPM_O23-NEXT:    [[TMP3:%.*]] = load %class.HomemadeVector.0*, %class.HomemadeVector.0** [[BASE_I6_I]], align 8, !tbaa [[TBAA0]]
 ; NEWPM_O23-NEXT:    [[BASE_I8_I:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0]], %class.HomemadeVector.0* [[TMP3]], i64 undef, i32 0
 ; NEWPM_O23-NEXT:    [[TMP4:%.*]] = load float*, float** [[BASE_I8_I]], align 8, !tbaa [[TBAA8]]
@@ -130,16 +130,18 @@
 ; OLDPM_O1-SAME: (%class.FloatVecPair* [[THIS:%.*]]) local_unnamed_addr comdat align 2 {
 ; OLDPM_O1-NEXT:  entry:
 ; OLDPM_O1-NEXT:    [[VSRC23:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR:%.*]], %class.FloatVecPair* [[THIS]], i64 0, i32 1
-; OLDPM_O1-NEXT:    [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0
 ; OLDPM_O1-NEXT:    [[CALL2:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]])
 ; OLDPM_O1-NEXT:    [[SIZE43:%.*]] = getelementptr inbounds [[CLASS_HOMEMADEVECTOR_0:%.*]], %class.HomemadeVector.0* [[CALL2]], i64 0, i32 1
 ; OLDPM_O1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[SIZE43]], align 8, !tbaa [[TBAA0:![0-9]+]]
 ; OLDPM_O1-NEXT:    [[CMP54_NOT:%.*]] = icmp eq i32 [[TMP0]], 0
-; OLDPM_O1-NEXT:    br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7:%.*]]
+; OLDPM_O1-NEXT:    br i1 [[CMP54_NOT]], label [[FOR_COND_CLEANUP6:%.*]], label [[FOR_BODY7_LR_PH:%.*]]
+; OLDPM_O1:       for.body7.lr.ph:
+; OLDPM_O1-NEXT:    [[VSRCDST:%.*]] = getelementptr inbounds [[CLASS_FLOATVECPAIR]], %class.FloatVecPair* [[THIS]], i64 0, i32 0
+; OLDPM_O1-NEXT:    br label [[FOR_BODY7:%.*]]
 ; OLDPM_O1:       for.cond.cleanup6:
 ; OLDPM_O1-NEXT:    ret void
 ; OLDPM_O1:       for.body7:
-; OLDPM_O1-NEXT:    [[J_05:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY7]] ], [ 0, [[ENTRY:%.*]] ]
+; OLDPM_O1-NEXT:    [[J_05:%.*]] = phi i32 [ 0, [[FOR_BODY7_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY7]] ]
 ; OLDPM_O1-NEXT:    [[CALL9:%.*]] = call %class.HomemadeVector.0* @_ZN14HomemadeVectorIS_IfLj8EELj8EEixEj(%class.HomemadeVector* nonnull [[VSRC23]])
 ; OLDPM_O1-NEXT:    [[CALL10:%.*]] = call float* @_ZN14HomemadeVectorIfLj8EEixEj(%class.HomemadeVector.0* [[CALL9]])
 ; OLDPM_O1-NEXT:    [[TMP1:%.*]] = load float, float* [[CALL10]], align 4, !tbaa [[TBAA6:![0-9]+]]