diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -36,6 +36,7 @@
 class OptimizationRemarkEmitter;
 class PredIteratorCache;
 class ScalarEvolution;
+class ScalarEvolutionExpander;
 class SCEV;
 class SCEVExpander;
 class TargetLibraryInfo;
@@ -471,7 +472,7 @@
 std::pair<Instruction *, Instruction *>
 addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
                  const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
-                 ScalarEvolution *SE);
+                 SCEVExpander &Expander);
 
 } // end namespace llvm
 
diff --git a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
--- a/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Transforms/Utils/ScalarEvolutionExpander.h
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
@@ -199,6 +200,8 @@
     ChainedPhis.clear();
   }
 
+  ScalarEvolution *getSE() { return &SE; }
+
   /// Return a vector containing all instructions inserted during expansion.
   SmallVector<Instruction *, 32> getAllInsertedInstructions() const {
     SmallVector<Instruction *, 32> Result;
@@ -511,10 +514,12 @@
   SCEVExpanderCleaner(SCEVExpander &Expander, DominatorTree &DT)
       : Expander(Expander), DT(DT), ResultUsed(false) {}
 
-  ~SCEVExpanderCleaner();
+  ~SCEVExpanderCleaner() { cleanup(); }
 
   /// Indicate that the result of the expansion is used.
   void markResultUsed() { ResultUsed = true; }
+
+  void cleanup();
 };
 } // namespace llvm
 
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1569,7 +1569,8 @@
 /// in \p TheLoop.  \return the values for the bounds.
 static PointerBounds expandBounds(const RuntimeCheckingPtrGroup *CG,
                                   Loop *TheLoop, Instruction *Loc,
-                                  SCEVExpander &Exp, ScalarEvolution *SE) {
+                                  SCEVExpander &Exp) {
+  ScalarEvolution *SE = Exp.getSE();
   // TODO: Add helper to retrieve pointers to CG.
   Value *Ptr = CG->RtCheck.Pointers[CG->Members[0]].PointerValue;
   const SCEV *Sc = SE->getSCEV(Ptr);
@@ -1608,16 +1609,15 @@
 /// lower bounds for both pointers in the check.
 static SmallVector<std::pair<PointerBounds, PointerBounds>, 4>
 expandBounds(const SmallVectorImpl<RuntimePointerCheck> &PointerChecks, Loop *L,
-             Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp) {
+             Instruction *Loc, SCEVExpander &Exp) {
   SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds;
 
   // Here we're relying on the SCEV Expander's cache to only emit code for the
   // same bounds once.
   transform(PointerChecks, std::back_inserter(ChecksWithBounds),
             [&](const RuntimePointerCheck &Check) {
-              PointerBounds First = expandBounds(Check.first, L, Loc, Exp, SE),
-                            Second =
-                                expandBounds(Check.second, L, Loc, Exp, SE);
+              PointerBounds First = expandBounds(Check.first, L, Loc, Exp),
+                            Second = expandBounds(Check.second, L, Loc, Exp);
               return std::make_pair(First, Second);
             });
 
@@ -1627,12 +1627,10 @@
 std::pair<Instruction *, Instruction *> llvm::addRuntimeChecks(
     Instruction *Loc, Loop *TheLoop,
     const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
-    ScalarEvolution *SE) {
+    SCEVExpander &Exp) {
   // TODO: Move noalias annotation code from LoopVersioning here and share with LV if possible.
   // TODO: Pass  RtPtrChecking instead of PointerChecks and SE separately, if possible
-  const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
-  SCEVExpander Exp(*SE, DL, "induction");
-  auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp);
+  auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, Exp);
 
   LLVMContext &Ctx = Loc->getContext();
   Instruction *FirstInst = nullptr;
diff --git a/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
--- a/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -60,9 +60,12 @@
   // Add the memcheck in the original preheader (this is empty initially).
   BasicBlock *RuntimeCheckBB = VersionedLoop->getLoopPreheader();
   const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
-  std::tie(FirstCheckInst, MemRuntimeCheck) =
-      addRuntimeChecks(RuntimeCheckBB->getTerminator(), VersionedLoop,
-                       AliasChecks, RtPtrChecking.getSE());
+
+  SCEVExpander Exp2(*RtPtrChecking.getSE(),
+                    VersionedLoop->getHeader()->getModule()->getDataLayout(),
+                    "induction");
+  std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks(
+      RuntimeCheckBB->getTerminator(), VersionedLoop, AliasChecks, Exp2);
 
   SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(),
                    "scev.check");
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2702,7 +2702,7 @@
   return false;
 }
 
-SCEVExpanderCleaner::~SCEVExpanderCleaner() {
+void SCEVExpanderCleaner::cleanup() {
   // Result is used, nothing to remove.
   if (ResultUsed)
     return;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -431,9 +431,8 @@
 
   return None;
 }
-
+struct GeneratedRTChecks;
 namespace llvm {
-
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
@@ -457,12 +456,12 @@
                       OptimizationRemarkEmitter *ORE, ElementCount VecWidth,
                       unsigned UnrollFactor, LoopVectorizationLegality *LVL,
                       LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
-                      ProfileSummaryInfo *PSI)
+                      ProfileSummaryInfo *PSI, GeneratedRTChecks &Check)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
         AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()),
         VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
-        BFI(BFI), PSI(PSI) {
+        BFI(BFI), PSI(PSI), Check(Check) {
     // Query this against the original loop and save it here because the profile
     // of the original loop header may change as the transformation happens.
     OptForSizeBasedOnProfile = llvm::shouldOptimizeForSize(
@@ -719,10 +718,10 @@
 
   /// Emit a bypass check to see if all of the SCEV assumptions we've
   /// had to make are correct.
-  void emitSCEVChecks(Loop *L, BasicBlock *Bypass);
+  BasicBlock *emitSCEVChecks(Loop *L, BasicBlock *Bypass);
 
   /// Emit bypass checks to check any memory assumptions we may have made.
-  void emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
+  BasicBlock *emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass);
 
   /// Compute the transformed value of Index at offset StartValue using step
   /// StepValue.
@@ -895,6 +894,10 @@
   // Whether this loop should be optimized for size based on profile guided size
   // optimizatios.
   bool OptForSizeBasedOnProfile;
+
+  /// Structure to hold information about generated runtime checks, responsible
+  /// for cleaning the checks, if they turn out unprofitable.
+  GeneratedRTChecks &Check;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -906,10 +909,10 @@
                     OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
                     LoopVectorizationLegality *LVL,
                     LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
-                    ProfileSummaryInfo *PSI)
+                    ProfileSummaryInfo *PSI, GeneratedRTChecks &Check)
       : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
                             ElementCount::getFixed(1), UnrollFactor, LVL, CM,
-                            BFI, PSI) {}
+                            BFI, PSI, Check) {}
 
 private:
   Value *getBroadcastInstrs(Value *V) override;
@@ -958,9 +961,11 @@
       const TargetTransformInfo *TTI, AssumptionCache *AC,
       OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
       LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
-      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+      GeneratedRTChecks &Checks)
       : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
-                            EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI),
+                            EPI.MainLoopVF, EPI.MainLoopUF, LVL, CM, BFI, PSI,
+                            Checks),
         EPI(EPI) {}
 
   // Override this function to handle the more complex control flow around the
@@ -994,9 +999,10 @@
       const TargetTransformInfo *TTI, AssumptionCache *AC,
       OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
       LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
-      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+      GeneratedRTChecks &Check)
       : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
-                                       EPI, LVL, CM, BFI, PSI) {}
+                                       EPI, LVL, CM, BFI, PSI, Check) {}
   /// Implements the interface for creating a vectorized skeleton using the
   /// *main loop* strategy (ie the first pass of vplan execution).
   BasicBlock *createEpilogueVectorizedLoopSkeleton() final override;
@@ -1016,17 +1022,16 @@
 // their epilogues.
 class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
 public:
-  EpilogueVectorizerEpilogueLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
-                    LoopInfo *LI, DominatorTree *DT,
-                    const TargetLibraryInfo *TLI,
-                    const TargetTransformInfo *TTI, AssumptionCache *AC,
-                    OptimizationRemarkEmitter *ORE,
-                    EpilogueLoopVectorizationInfo &EPI,
-                    LoopVectorizationLegality *LVL,
-                    llvm::LoopVectorizationCostModel *CM,
-                    BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+  EpilogueVectorizerEpilogueLoop(
+      Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
+      DominatorTree *DT, const TargetLibraryInfo *TLI,
+      const TargetTransformInfo *TTI, AssumptionCache *AC,
+      OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
+      LoopVectorizationLegality *LVL, llvm::LoopVectorizationCostModel *CM,
+      BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+      GeneratedRTChecks &Checks)
       : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE,
-                                       EPI, LVL, CM, BFI, PSI) {}
+                                       EPI, LVL, CM, BFI, PSI, Checks) {}
   /// Implements the interface for creating a vectorized skeleton using the
   /// *epilogue loop* strategy (ie the second pass of vplan execution).
   BasicBlock *createEpilogueVectorizedLoopSkeleton() final override;
@@ -1839,9 +1844,99 @@
   /// Profitable vector factors.
   SmallVector<VectorizationFactor, 8> ProfitableVFs;
 };
-
 } // end namespace llvm
 
+/// Helper struct to manage generating runtime checks for vectorization.
+///
+/// The runtime checks are created up-front in a temporary block to allow better
+/// estimating the cost and un-linked from the existing IR. After deciding to
+/// vectorize, the checks are moved backed. If deciding not to vectorize, the
+/// temporary block is completely removed.
+struct GeneratedRTChecks {
+  BasicBlock *TmpBlock = nullptr;
+  BasicBlock *Preheader;
+  Value *SCEVCheck;
+  Instruction *FirstCheckInst = nullptr;
+  Instruction *MemRuntimeCheck = nullptr;
+
+  ScalarEvolution &SE;
+  DominatorTree *DT;
+
+  SCEVExpander Exp;
+  SCEVExpanderCleaner Cleaner;
+
+  GeneratedRTChecks(BasicBlock *Preheader, ScalarEvolution &SE,
+                    DominatorTree *DT)
+      : Preheader(Preheader), SE(SE), DT(DT),
+        Exp(SE, Preheader->getModule()->getDataLayout(), "scev.check"),
+        Cleaner(Exp, *DT) {}
+
+  /// Generate runtime checks in temporary block (TmpBlock), so we can
+  /// accurately estimate the cost of the runtime checks. The block is un-linked
+  /// from the IR and is added back during vector code generation. If there is
+  /// no vector code generation, the check blocks is removed completely.
+  void Create(Loop *L, const LoopAccessInfo &LAI,
+              const SCEVUnionPredicate &UnionPred, LoopInfo *LI) {
+    BasicBlock *LoopHeader = Preheader->getSingleSuccessor();
+    TmpBlock = SplitBlock(Preheader, Preheader->getTerminator(), DT, LI,
+                          nullptr, "tmp.rtchecks");
+
+    SCEVCheck =
+        Exp.expandCodeForPredicate(&UnionPred, TmpBlock->getTerminator());
+
+    const auto &RtPtrChecking = *LAI.getRuntimePointerChecking();
+    if (RtPtrChecking.Need) {
+      std::tie(FirstCheckInst, MemRuntimeCheck) = addRuntimeChecks(
+          TmpBlock->getTerminator(), L, RtPtrChecking.getChecks(), Exp);
+      assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
+                                "claimed checks are required");
+    }
+
+    // Unhook the temporary block with the checks, update various places
+    // accordingly.
+    TmpBlock->replaceAllUsesWith(Preheader);
+    TmpBlock->getTerminator()->moveBefore(Preheader->getTerminator());
+    Preheader->getTerminator()->eraseFromParent();
+    DT->changeImmediateDominator(LoopHeader, Preheader);
+    DT->eraseNode(TmpBlock);
+    LI->removeBlock(TmpBlock);
+  }
+
+  ~GeneratedRTChecks() {
+    if (!TmpBlock) {
+      Cleaner.markResultUsed();
+      return;
+    }
+
+    if (!SCEVCheck && TmpBlock->empty()) {
+      Cleaner.markResultUsed();
+      TmpBlock->eraseFromParent();
+      return;
+    }
+
+    if (MemRuntimeCheck && !isa<Constant>(MemRuntimeCheck))
+      MemRuntimeCheck->replaceAllUsesWith(
+          ConstantInt::getFalse(MemRuntimeCheck->getType()->getContext()));
+    if (SCEVCheck && !isa<Constant>(SCEVCheck))
+      SCEVCheck->replaceAllUsesWith(
+          ConstantInt::getFalse(SCEVCheck->getType()->getContext()));
+
+    SmallPtrSet<Value *, 8> Removed;
+    // Completely remove the block.
+    for (auto &I : make_early_inc_range(reverse(*TmpBlock))) {
+      if (Exp.isInsertedInstruction(&I))
+        continue;
+      SE.forgetValue(&I);
+      SE.eraseValueFromMap(&I);
+      Removed.insert(&I);
+      I.eraseFromParent();
+    }
+
+    Cleaner.cleanup();
+    TmpBlock->eraseFromParent();
+  }
+};
+
 // Return true if \p OuterLp is an outer loop annotated with hints for explicit
 // vectorization. The loop needs to be annotated with #pragma omp simd
 // simdlen(#) or #pragma clang vectorize(enable) vectorize_width(#). If the
@@ -3100,22 +3195,16 @@
   LoopBypassBlocks.push_back(TCCheckBlock);
 }
 
-void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
+BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
   // Reuse existing vector loop preheader for SCEV checks.
   // Note that new preheader block is generated for vector loop.
-  BasicBlock *const SCEVCheckBlock = LoopVectorPreHeader;
+  BasicBlock *const SCEVCheckBlock = Check.TmpBlock;
 
-  // Generate the code to check that the SCEV assumptions that we made.
-  // We want the new basic block to start at the first instruction in a
-  // sequence of instructions that form a check.
-  SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(),
-                   "scev.check");
-  Value *SCEVCheck = Exp.expandCodeForPredicate(
-      &PSE.getUnionPredicate(), SCEVCheckBlock->getTerminator());
-
-  if (auto *C = dyn_cast<ConstantInt>(SCEVCheck))
+  if (!Check.TmpBlock)
+    return nullptr;
+  if (auto *C = dyn_cast<ConstantInt>(Check.SCEVCheck))
     if (C->isZero())
-      return;
+      return nullptr;
 
   assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
            (OptForSizeBasedOnProfile &&
@@ -3123,10 +3212,30 @@
          "Cannot SCEV check stride or overflow when optimizing for size");
 
   SCEVCheckBlock->setName("vector.scevcheck");
+
+  auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
+
+  BranchInst::Create(LoopVectorPreHeader, Check.TmpBlock);
   // Create new preheader for vector loop.
-  LoopVectorPreHeader =
-      SplitBlock(SCEVCheckBlock, SCEVCheckBlock->getTerminator(), DT, LI,
-                 nullptr, "vector.ph");
+  Check.TmpBlock = SplitBlock(SCEVCheckBlock,
+                              cast<Instruction>(Check.SCEVCheck)->getNextNode(),
+                              nullptr, nullptr, nullptr, "");
+
+  if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
+    PL->addBasicBlockToLoop(SCEVCheckBlock, *LI);
+
+  Check.TmpBlock->replaceAllUsesWith(SCEVCheckBlock);
+  Check.TmpBlock->getTerminator()->moveBefore(SCEVCheckBlock->getTerminator());
+  SCEVCheckBlock->getTerminator()->eraseFromParent();
+  SCEVCheckBlock->moveBefore(LoopVectorPreHeader);
+
+  auto *PHTerm = Pred->getTerminator();
+  for (unsigned i = 0; i < PHTerm->getNumSuccessors(); i++)
+    if (PHTerm->getSuccessor(i) == LoopVectorPreHeader)
+      PHTerm->setSuccessor(i, SCEVCheckBlock);
+
+  DT->addNewBlock(SCEVCheckBlock, Pred);
+  DT->changeImmediateDominator(LoopVectorPreHeader, SCEVCheckBlock);
 
   // Update dominator only if this is first RT check.
   if (LoopBypassBlocks.empty()) {
@@ -3136,27 +3245,28 @@
 
   ReplaceInstWithInst(
       SCEVCheckBlock->getTerminator(),
-      BranchInst::Create(Bypass, LoopVectorPreHeader, SCEVCheck));
+      BranchInst::Create(Bypass, LoopVectorPreHeader, Check.SCEVCheck));
   LoopBypassBlocks.push_back(SCEVCheckBlock);
   AddedSafetyChecks = true;
+  Check.SCEVCheck = nullptr;
+  return SCEVCheckBlock;
 }
 
-void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
+BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L,
+                                                      BasicBlock *Bypass) {
   // VPlan-native path does not do any analysis for runtime checks currently.
   if (EnableVPlanNativePath)
-    return;
+    return nullptr;
 
   // Reuse existing vector loop preheader for runtime memory checks.
   // Note that new preheader block is generated for vector loop.
-  BasicBlock *const MemCheckBlock = L->getLoopPreheader();
+  BasicBlock *const MemCheckBlock = Check.TmpBlock;
 
-  // Generate the code that checks in runtime if arrays overlap. We put the
-  // checks into a separate block to make the more common case of few elements
-  // faster.
-  auto *LAI = Legal->getLAI();
-  const auto &RtPtrChecking = *LAI->getRuntimePointerChecking();
-  if (!RtPtrChecking.Need)
-    return;
+  // Check if we generated code that checks in runtime if arrays overlap. We put
+  // the checks into a separate block to make the more common case of few
+  // elements faster.
+  if (!Check.MemRuntimeCheck)
+    return nullptr;
 
   if (MemCheckBlock->getParent()->hasOptSize() || OptForSizeBasedOnProfile) {
     assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
@@ -3172,11 +3282,22 @@
     });
   }
 
+  auto *Pred = LoopVectorPreHeader->getSinglePredecessor();
+  auto *PHTerm = Pred->getTerminator();
+  for (unsigned i = 0; i < PHTerm->getNumSuccessors(); i++)
+    if (PHTerm->getSuccessor(i) == LoopVectorPreHeader)
+      PHTerm->setSuccessor(i, Check.TmpBlock);
+  auto *BI = BranchInst::Create(LoopVectorPreHeader, Check.TmpBlock);
+  BI->setDebugLoc(PHTerm->getDebugLoc());
+
+  DT->addNewBlock(Check.TmpBlock, Pred);
+  DT->changeImmediateDominator(LoopVectorPreHeader, MemCheckBlock);
+  Check.TmpBlock->moveBefore(LoopVectorPreHeader);
+
+  if (auto *PL = LI->getLoopFor(LoopVectorPreHeader))
+    PL->addBasicBlockToLoop(Check.TmpBlock, *LI);
+
   MemCheckBlock->setName("vector.memcheck");
-  // Create new preheader for vector loop.
-  LoopVectorPreHeader =
-      SplitBlock(MemCheckBlock, MemCheckBlock->getTerminator(), DT, LI, nullptr,
-                 "vector.ph");
 
   auto *CondBranch = cast<BranchInst>(
       Builder.CreateCondBr(Builder.getTrue(), Bypass, LoopVectorPreHeader));
@@ -3189,15 +3310,12 @@
     DT->changeImmediateDominator(Bypass, MemCheckBlock);
     DT->changeImmediateDominator(LoopExitBlock, MemCheckBlock);
   }
+  Check.TmpBlock = nullptr;
 
-  Instruction *FirstCheckInst;
-  Instruction *MemRuntimeCheck;
-  std::tie(FirstCheckInst, MemRuntimeCheck) =
-      addRuntimeChecks(MemCheckBlock->getTerminator(), OrigLoop,
-                       RtPtrChecking.getChecks(), RtPtrChecking.getSE());
-  assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
-                            "claimed checks are required");
-  CondBranch->setCondition(MemRuntimeCheck);
+  ReplaceInstWithInst(
+      MemCheckBlock->getTerminator(),
+      BranchInst::Create(Bypass, LoopVectorPreHeader, Check.MemRuntimeCheck));
+  AddedSafetyChecks = true;
 
   // We currently don't use LoopVersioning for the actual loop cloning but we
   // still use it to add the noalias metadata.
@@ -3206,6 +3324,7 @@
       Legal->getLAI()->getRuntimePointerChecking()->getChecks(), OrigLoop, LI,
       DT, PSE.getSE());
   LVer->prepareNoAliasMetadata();
+  return MemCheckBlock;
 }
 
 Value *InnerLoopVectorizer::emitTransformedIndex(
@@ -7640,22 +7759,12 @@
 
   // Generate the code to check any assumptions that we've made for SCEV
   // expressions.
-  BasicBlock *SavedPreHeader = LoopVectorPreHeader;
-  emitSCEVChecks(Lp, LoopScalarPreHeader);
-
-  // If a safety check was generated save it.
-  if (SavedPreHeader != LoopVectorPreHeader)
-    EPI.SCEVSafetyCheck = SavedPreHeader;
+  EPI.SCEVSafetyCheck = emitSCEVChecks(Lp, LoopScalarPreHeader);
 
   // Generate the code that checks at runtime if arrays overlap. We put the
   // checks into a separate block to make the more common case of few elements
   // faster.
-  SavedPreHeader = LoopVectorPreHeader;
-  emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
-
-  // If a safety check was generated save/overwite it.
-  if (SavedPreHeader != LoopVectorPreHeader)
-    EPI.MemSafetyCheck = SavedPreHeader;
+  EPI.MemSafetyCheck = emitMemRuntimeChecks(Lp, LoopScalarPreHeader);
 
   // Generate the iteration count check for the main loop, *after* the check
   // for the epilogue loop, so that the path-length is shorter for the case
@@ -8987,8 +9096,9 @@
 
   LVP.setBestPlan(VF.Width, 1);
 
+  GeneratedRTChecks Checks(L->getLoopPreheader(), *PSE.getSE(), DT);
   InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
-                         &CM, BFI, PSI);
+                         &CM, BFI, PSI, Checks);
   LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
                     << L->getHeader()->getParent()->getName() << "\"\n");
   LVP.executePlan(LB, DT);
@@ -8996,7 +9106,6 @@
   // Mark the loop as already vectorized to avoid vectorizing again.
   Hints.setAlreadyVectorized();
 
-  assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()));
   return true;
 }
 
@@ -9153,6 +9262,11 @@
     IC = CM.selectInterleaveCount(VF.Width, VF.Cost);
   }
 
+  // Optimistically generate runtime checks. Drop them if they turn out to not
+  // be profitable.
+  GeneratedRTChecks Checks(L->getLoopPreheader(), *PSE.getSE(), DT);
+  Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate(), LI);
+
   // Identify the diagnostic messages that should be produced.
   std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
   bool VectorizeLoop = true, InterleaveLoop = true;
@@ -9253,7 +9367,7 @@
     // If we decided that it is not legal to vectorize the loop, then
     // interleave it.
     InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
-                               BFI, PSI);
+                               BFI, PSI, Checks);
     LVP.executePlan(Unroller, DT);
 
     ORE->emit([&]() {
@@ -9276,7 +9390,7 @@
       EpilogueLoopVectorizationInfo EPI(VF.Width.getKnownMinValue(), IC,
                                         EpilogueVF.Width.getKnownMinValue(), 1);
       EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI,
-                                         &LVL, &CM, BFI, PSI);
+                                         &LVL, &CM, BFI, PSI, Checks);
 
       LVP.setBestPlan(EPI.MainLoopVF, EPI.MainLoopUF);
       LVP.executePlan(MainILV, DT);
@@ -9290,8 +9404,8 @@
       LVP.setBestPlan(EPI.EpilogueVF, EPI.EpilogueUF);
       EPI.MainLoopVF = EPI.EpilogueVF;
       EPI.MainLoopUF = EPI.EpilogueUF;
-      EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
-                                               ORE, EPI, &LVL, &CM, BFI, PSI);
+      EpilogueVectorizerEpilogueLoop EpilogILV(
+          L, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, &LVL, &CM, BFI, PSI, Checks);
       LVP.executePlan(EpilogILV, DT);
       ++LoopsEpilogueVectorized;
 
@@ -9299,7 +9413,7 @@
         DisableRuntimeUnroll = true;
     } else {
       InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
-                             &LVL, &CM, BFI, PSI);
+                             &LVL, &CM, BFI, PSI, Checks);
       LVP.executePlan(LB, DT);
       ++LoopsVectorized;
 
@@ -9333,7 +9447,6 @@
     Hints.setAlreadyVectorized();
   }
 
-  assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()));
   return true;
 }
 
@@ -9399,6 +9512,8 @@
     Changed |= CFGChanged |= processLoop(L);
   }
 
+  assert(!Changed || !verifyFunction(F, &dbgs()));
+
   // Process each loop nest in the function.
   return LoopVectorizeResult(Changed, CFGChanged);
 }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
--- a/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/illegal-parallel-loop-uniform-write.ll
@@ -26,9 +26,9 @@
 ; CHECK-NEXT:    br i1 [[CMP27]], label [[FOR_BODY3_LR_PH_US_PREHEADER:%.*]], label [[FOR_END15:%.*]]
 ; CHECK:       for.body3.lr.ph.us.preheader:
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[M]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[K:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[K:%.*]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
 ; CHECK-NEXT:    br label [[FOR_BODY3_LR_PH_US:%.*]]
 ; CHECK:       for.end.us:
 ; CHECK-NEXT:    [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDVARS_IV33:%.*]]
@@ -54,12 +54,12 @@
 ; CHECK-NEXT:    br i1 [[EXITCOND32]], label [[FOR_END_US:%.*]], label [[FOR_BODY3_US]], !llvm.loop !3
 ; CHECK:       for.body3.lr.ph.us:
 ; CHECK-NEXT:    [[INDVARS_IV33]] = phi i64 [ [[INDVARS_IV_NEXT34]], [[FOR_END_US]] ], [ 0, [[FOR_BODY3_LR_PH_US_PREHEADER]] ]
-; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP3]], [[INDVARS_IV33]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP1]], [[INDVARS_IV33]]
 ; CHECK-NEXT:    [[TMP8:%.*]] = trunc i64 [[TMP7]] to i32
 ; CHECK-NEXT:    [[TMP9:%.*]] = trunc i64 [[INDVARS_IV33]] to i32
 ; CHECK-NEXT:    [[ADD_US]] = add i32 [[TMP9]], [[K]]
 ; CHECK-NEXT:    [[ARRAYIDX7_US]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV33]]
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH]], label [[VECTOR_SCEVCHECK:%.*]]
 ; CHECK:       vector.scevcheck:
 ; CHECK-NEXT:    [[MUL:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 1, i32 [[TMP0]])
@@ -74,8 +74,8 @@
 ; CHECK-NEXT:    [[TMP16:%.*]] = or i1 false, [[TMP15]]
 ; CHECK-NEXT:    br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -100,7 +100,7 @@
 ; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_US]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    [[BC_RESUME_VAL]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY3_LR_PH_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll b/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/runtime-drop-crash.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-vectorize -force-vector-width=4 %s | FileCheck %s
+
+%struct.foo = type { [400 x double] }
+
+; Make sure we do not crash when dropping runtime checks.
+
+; CHECK-NOT: vector.body
+
+define void @barney(%struct.foo* %ptr) {
+entry:
+  br label %loop
+
+loop:
+  %tmp3 = phi i64 [ 0, %entry ], [ %tmp18, %loop ]
+  %tmp4 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 undef
+  %tmp5 = bitcast %struct.foo* %tmp4 to i64*
+  store i64 0, i64* %tmp5, align 8
+  %tmp8 = add i64 1, %tmp3
+  %tmp10 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 %tmp8
+  %tmp11 = bitcast %struct.foo* %tmp10 to i64*
+  store i64 1, i64* %tmp11, align 8
+  %tmp14 = add i64 undef, %tmp3
+  %tmp16 = getelementptr inbounds %struct.foo, %struct.foo* %ptr, i64 %tmp14
+  %tmp17 = bitcast %struct.foo* %tmp16 to i64*
+  store i64 2, i64* %tmp17, align 8
+  %tmp18 = add nuw nsw i64 %tmp3, 4
+  %c = icmp ult i64 %tmp18, 400
+  br i1 %c, label %exit, label %loop
+
+exit:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll
--- a/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/skeleton-lcssa-crash.ll
@@ -23,22 +23,22 @@
 ; CHECK-NEXT:    [[C_3:%.*]] = call i1 @cond()
 ; CHECK-NEXT:    br i1 [[C_3]], label [[LOOP3_PREHEADER:%.*]], label [[INNER_LATCH:%.*]]
 ; CHECK:       loop3.preheader:
-; CHECK-NEXT:    [[L_1_LCSSA11:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
+; CHECK-NEXT:    [[L_1_LCSSA8:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
 ; CHECK-NEXT:    [[L_1_LCSSA:%.*]] = phi i16* [ [[L_1]], [[INNER_BB]] ]
 ; CHECK-NEXT:    [[L_2_LCSSA:%.*]] = phi i16* [ [[L_2]], [[INNER_BB]] ]
-; CHECK-NEXT:    [[L_2_LCSSA4:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8*
+; CHECK-NEXT:    [[L_2_LCSSA3:%.*]] = bitcast i16* [[L_2_LCSSA]] to i8*
 ; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], 1
 ; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 2
 ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
 ; CHECK:       vector.memcheck:
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[L_2_LCSSA4]], i64 1
+; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[L_2_LCSSA3]], i64 1
 ; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i16, i16* [[L_1_LCSSA]], i64 1
-; CHECK-NEXT:    [[SCEVGEP9:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[N]], 2
-; CHECK-NEXT:    [[SCEVGEP10:%.*]] = getelementptr i16, i16* [[L_1_LCSSA11]], i64 [[TMP1]]
-; CHECK-NEXT:    [[SCEVGEP1013:%.*]] = bitcast i16* [[SCEVGEP10]] to i8*
-; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA4]], [[SCEVGEP1013]]
-; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP9]], [[UGLYGEP]]
+; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr i16, i16* [[L_1_LCSSA8]], i64 [[TMP1]]
+; CHECK-NEXT:    [[SCEVGEP710:%.*]] = bitcast i16* [[SCEVGEP7]] to i8*
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[L_2_LCSSA3]], [[SCEVGEP710]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP6]], [[UGLYGEP]]
 ; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
 ; CHECK-NEXT:    [[MEMCHECK_CONFLICT:%.*]] = and i1 [[FOUND_CONFLICT]], true
 ; CHECK-NEXT:    br i1 [[MEMCHECK_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
@@ -66,8 +66,6 @@
 ; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[L_18:%.*]] = phi i16* [ [[L_1_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[L_1_LCSSA]], [[LOOP3_PREHEADER]] ], [ [[L_1_LCSSA]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[L_23:%.*]] = phi i16* [ [[L_2_LCSSA]], [[VECTOR_MEMCHECK]] ], [ [[L_2_LCSSA]], [[LOOP3_PREHEADER]] ], [ [[L_2_LCSSA]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP3_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
 ; CHECK-NEXT:    br label [[LOOP3:%.*]]
 ; CHECK:       inner.latch:
@@ -79,20 +77,19 @@
 ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
 ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
 ; CHECK-NEXT:    [[C_5:%.*]] = icmp ult i64 [[IV]], [[N]]
-; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_18]], i64 [[IV_NEXT]]
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[L_1_LCSSA]], i64 [[IV_NEXT]]
 ; CHECK-NEXT:    [[LOOP_L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
-; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_23]], i64 0
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[L_2_LCSSA]], i64 0
 ; CHECK-NEXT:    store i16 [[LOOP_L_1]], i16* [[GEP_2]], align 2
 ; CHECK-NEXT:    br i1 [[C_5]], label [[LOOP3]], label [[EXIT_LOOPEXIT]], [[LOOP7:!llvm.loop !.*]]
 ; CHECK:       exit.loopexit:
-; CHECK-NEXT:    [[L_17:%.*]] = phi i16* [ [[L_1_LCSSA]], [[MIDDLE_BLOCK]] ], [ [[L_18]], [[LOOP3]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit.loopexit1:
-; CHECK-NEXT:    [[L_1_LCSSA5:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ]
+; CHECK-NEXT:    [[L_1_LCSSA4:%.*]] = phi i16* [ [[L_1]], [[INNER_LATCH]] ]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[L_16:%.*]] = phi i16* [ [[L_1_LCSSA5]], [[EXIT_LOOPEXIT1]] ], [ [[L_17]], [[EXIT_LOOPEXIT]] ]
-; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[L_16]], align 2
+; CHECK-NEXT:    [[L_15:%.*]] = phi i16* [ [[L_1_LCSSA4]], [[EXIT_LOOPEXIT1]] ], [ [[L_1_LCSSA]], [[EXIT_LOOPEXIT]] ]
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[L_15]], align 2
 ; CHECK-NEXT:    ret i16 [[L_3]]
 ;
 entry:
diff --git a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
--- a/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-mem-access.ll
@@ -54,41 +54,3 @@
 for.end:
   ret void
 }
-
-; We used to crash on this function because we removed the fptosi cast when
-; replacing the symbolic stride '%conv'.
-; PR18480
-
-; CHECK-LABEL: fn1
-; CHECK: load <2 x double>
-
-define void @fn1(double* noalias %x, double* noalias %c, double %a) {
-entry:
-  %conv = fptosi double %a to i32
-  %conv2 = add i32 %conv, 4
-  %cmp8 = icmp sgt i32 %conv2, 0
-  br i1 %cmp8, label %for.body.preheader, label %for.end
-
-for.body.preheader:
-  br label %for.body
-
-for.body:
-  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
-  %0 = trunc i64 %indvars.iv to i32
-  %mul = mul nsw i32 %0, %conv
-  %idxprom = sext i32 %mul to i64
-  %arrayidx = getelementptr inbounds double, double* %x, i64 %idxprom
-  %1 = load double, double* %arrayidx, align 8
-  %arrayidx3 = getelementptr inbounds double, double* %c, i64 %indvars.iv
-  store double %1, double* %arrayidx3, align 8
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %conv2
-  br i1 %exitcond, label %for.end.loopexit, label %for.body
-
-for.end.loopexit:
-  br label %for.end
-
-for.end:
-  ret void
-}