diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
--- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -44,10 +44,6 @@
   static unsigned VectorizationInterleave;
   /// True if force-vector-interleave was specified by the user.
   static bool isInterleaveForced();
-
-  /// \When performing memory disambiguation checks at runtime do not
-  /// make more than this number of comparisons.
-  static unsigned RuntimeMemoryCheckThreshold;
 };
 
 /// Checks memory dependences among accesses to the same underlying
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -83,13 +83,6 @@
                             VectorizerParams::VectorizationInterleave));
 unsigned VectorizerParams::VectorizationInterleave;
 
-static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
-    "runtime-memory-check-threshold", cl::Hidden,
-    cl::desc("When performing memory disambiguation checks at runtime do not "
-             "generate more than this number of comparisons (default = 8)."),
-    cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
-unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
-
 /// The maximum iterations used to merge memory checks
 static cl::opt<unsigned> MemoryCheckMergeThreshold(
     "memory-check-merge-threshold", cl::Hidden,
diff --git a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
--- a/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -98,6 +98,12 @@
 
 static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
 
+static cl::opt<unsigned> RuntimeMemoryCheckThreshold(
+    "runtime-memory-check-threshold", cl::Hidden,
+    cl::desc("When performing memory disambiguation checks at runtime do not "
+             "generate more than this number of comparisons (default = 8)."),
+    cl::init(8));
+
 /// Threshold minimum allowed percentage for possible
 /// invariant instructions in a loop.
 static cl::opt<float>
@@ -422,8 +428,7 @@
     return false;
   }
   // Number of runtime-checks should be less then RuntimeMemoryCheckThreshold
-  if (LAI->getNumRuntimePointerChecks() >
-      VectorizerParams::RuntimeMemoryCheckThreshold) {
+  if (LAI->getNumRuntimePointerChecks() > RuntimeMemoryCheckThreshold) {
     LLVM_DEBUG(
         dbgs() << "    LAA: Runtime checks are more than threshold !!\n");
     ORE->emit([&]() {
@@ -433,7 +438,7 @@
              << "Number of runtime checks "
              << NV("RuntimeChecks", LAI->getNumRuntimePointerChecks())
              << " exceeds threshold "
-             << NV("Threshold", VectorizerParams::RuntimeMemoryCheckThreshold);
+             << NV("Threshold", RuntimeMemoryCheckThreshold);
     });
     return false;
   }
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -34,6 +34,7 @@
 class PredicatedScalarEvolution;
 class LoopVectorizationRequirements;
 class LoopVectorizeHints;
+class GeneratedRTChecks;
 class OptimizationRemarkEmitter;
 class TargetTransformInfo;
 class TargetLibraryInfo;
@@ -183,12 +184,16 @@
   /// Cost of the loop with that width.
   InstructionCost Cost;
 
-  VectorizationFactor(ElementCount Width, InstructionCost Cost)
-      : Width(Width), Cost(Cost) {}
+  /// Cost of the scalar loop.
+  InstructionCost ScalarCost;
+
+  VectorizationFactor(ElementCount Width, InstructionCost Cost,
+                      InstructionCost ScalarCost)
+      : Width(Width), Cost(Cost), ScalarCost(ScalarCost) {}
 
   /// Width 1 means no vectorization, cost 0 means uncomputed cost.
   static VectorizationFactor Disabled() {
-    return {ElementCount::getFixed(1), 0};
+    return {ElementCount::getFixed(1), 0, 0};
   }
 
   bool operator==(const VectorizationFactor &rhs) const {
@@ -289,7 +294,8 @@
 
   /// Plan how to best vectorize, return the best VF and its cost, or None if
   /// vectorization and interleaving should be avoided up front.
-  Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
+  Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC,
+                                     GeneratedRTChecks &Checks);
 
   /// Use the VPlan-native path to plan how to best vectorize, return the best
   /// VF and its cost.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -198,10 +198,13 @@
              "value are vectorized only if no scalar iteration overheads "
              "are incurred."));
 
-static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
-    "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
-    cl::desc("The maximum allowed number of runtime memory checks with a "
-             "vectorize(enable) pragma."));
+static cl::opt<double> VectorizeMemoryCheckFactor(
+    "vectorize-memory-check-factor", cl::Hidden,
+    cl::desc(
+        "When performing memory disambiguation checks at runtime, the cost of "
+        "the runtime memory checks themselves should not be larger than the "
+        "cost of of N (default 7.0) scalar loop iterations."),
+    cl::init(7.0));
 
 // Option prefer-predicate-over-epilogue indicates that an epilogue is undesired,
 // that predication is preferred, and this lists all options. I.e., the
@@ -423,9 +426,6 @@
   return None;
 }
 
-// Forward declare GeneratedRTChecks.
-class GeneratedRTChecks;
-
 namespace llvm {
 
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
@@ -1635,6 +1635,17 @@
     Scalars.clear();
   }
 
+  /// The vectorization cost is a combination of the cost itself and a boolean
+  /// indicating whether any of the contributing operations will actually
+  /// operate on vector values after type legalization in the backend. If this
+  /// latter value is false, then all operations will be scalarized (i.e. no
+  /// vectorization has actually taken place).
+  using VectorizationCostTy = std::pair<InstructionCost, bool>;
+
+  /// Returns the execution time cost of an instruction for a given vector
+  /// width. Vector width of one means scalar.
+  VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
+
 private:
   unsigned NumPredStores = 0;
 
@@ -1663,13 +1674,6 @@
   /// of elements.
   ElementCount getMaxLegalScalableVF(unsigned MaxSafeElements);
 
-  /// The vectorization cost is a combination of the cost itself and a boolean
-  /// indicating whether any of the contributing operations will actually
-  /// operate on vector values after type legalization in the backend. If this
-  /// latter value is false, then all operations will be scalarized (i.e. no
-  /// vectorization has actually taken place).
-  using VectorizationCostTy = std::pair<InstructionCost, bool>;
-
   /// Returns the expected execution cost. The unit of the cost does
   /// not matter because we use the 'cost' units to compare different
   /// vector widths. The cost that is returned is *not* normalized by
@@ -1681,10 +1685,6 @@
   expectedCost(ElementCount VF,
                SmallVectorImpl<InstructionVFPair> *Invalid = nullptr);
 
-  /// Returns the execution time cost of an instruction for a given vector
-  /// width. Vector width of one means scalar.
-  VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
-
   /// The cost-computation logic from getInstructionCost which provides
   /// the vector type as an output parameter.
   InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
@@ -1903,7 +1903,6 @@
   /// Profitable vector factors.
   SmallVector<VectorizationFactor, 8> ProfitableVFs;
 };
-} // end namespace llvm
 
 /// Helper struct to manage generating runtime checks for vectorization.
 ///
@@ -2008,6 +2007,25 @@
     }
   }
 
+  unsigned getCost(LoopVectorizationCostModel &CM) {
+    unsigned RTCheckCost = 0;
+    if (SCEVCheckBlock)
+      for (Instruction &I : *SCEVCheckBlock) {
+        if (SCEVCheckBlock->getTerminator() == &I)
+          continue;
+        RTCheckCost += *CM.getInstructionCost(&I, ElementCount::getFixed(1))
+                            .first.getValue();
+      }
+    if (MemCheckBlock)
+      for (Instruction &I : *MemCheckBlock) {
+        if (MemCheckBlock->getTerminator() == &I)
+          continue;
+        RTCheckCost += *CM.getInstructionCost(&I, ElementCount::getFixed(1))
+                            .first.getValue();
+      }
+    return RTCheckCost;
+  }
+
   /// Remove the created SCEV & memory runtime check blocks & instructions, if
   /// unused.
   ~GeneratedRTChecks() {
@@ -2106,6 +2124,7 @@
     return MemCheckBlock;
   }
 };
+} // end namespace llvm
 
 // Return true if \p OuterLp is an outer loop annotated with hints for explicit
 // vectorization. The loop needs to be annotated with #pragma omp simd
@@ -3305,7 +3324,6 @@
 }
 
 BasicBlock *InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
-
   BasicBlock *const SCEVCheckBlock =
       RTChecks.emitSCEVChecks(L, Bypass, LoopVectorPreHeader, LoopExitBlock);
   if (!SCEVCheckBlock)
@@ -6042,7 +6060,8 @@
   assert(VFCandidates.count(ElementCount::getFixed(1)) &&
          "Expected Scalar VF to be a candidate");
 
-  const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost);
+  const VectorizationFactor ScalarCost(ElementCount::getFixed(1), ExpectedCost,
+                                       ExpectedCost);
   VectorizationFactor ChosenFactor = ScalarCost;
 
   bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
@@ -6060,7 +6079,7 @@
       continue;
 
     VectorizationCostTy C = expectedCost(i, &InvalidCosts);
-    VectorizationFactor Candidate(i, C.first);
+    VectorizationFactor Candidate(i, C.first, ScalarCost.ScalarCost);
     LLVM_DEBUG(
         dbgs() << "LV: Vector loop of width " << i << " costs: "
                << (Candidate.Cost / Candidate.Width.getKnownMinValue())
@@ -6251,7 +6270,7 @@
     LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization factor is forced.\n";);
     if (LVP.hasPlanWithVFs(
             {MainLoopVF, ElementCount::getFixed(EpilogueVectorizationForceVF)}))
-      return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0};
+      return {ElementCount::getFixed(EpilogueVectorizationForceVF), 0, 0};
     else {
       LLVM_DEBUG(
           dbgs()
@@ -8080,7 +8099,7 @@
     if (VPlanBuildStressTest)
       return VectorizationFactor::Disabled();
 
-    return {VF, 0 /*Cost*/};
+    return {VF, 0 /*Cost*/, 0};
   }
 
   LLVM_DEBUG(
@@ -8090,7 +8109,8 @@
 }
 
 Optional<VectorizationFactor>
-LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC) {
+LoopVectorizationPlanner::plan(ElementCount UserVF, unsigned UserIC,
+                               GeneratedRTChecks &Checks) {
   assert(OrigLoop->isInnermost() && "Inner loop expected.");
   FixedScalableVFPair MaxFactors = CM.computeMaxVF(UserVF, UserIC);
   if (!MaxFactors) // Cases that should not to be vectorized nor interleaved.
@@ -8123,7 +8143,8 @@
       CM.collectInLoopReductions();
       buildVPlansWithVPRecipes(UserVF, UserVF);
       LLVM_DEBUG(printPlans(dbgs()));
-      return {{UserVF, 0}};
+      Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate());
+    return {{UserVF, 0, 0}};
     } else
       reportVectorizationInfo("UserVF ignored because of invalid costs.",
                               "InvalidCost", ORE, OrigLoop);
@@ -8159,15 +8180,14 @@
   // Select the optimal vectorization factor.
   auto SelectedVF = CM.selectVectorizationFactor(VFCandidates);
 
+  if (!SelectedVF.Width.isScalar())
+    Checks.Create(OrigLoop, *Legal->getLAI(), PSE.getUnionPredicate());
+
   // Check if it is profitable to vectorize with runtime checks.
-  unsigned NumRuntimePointerChecks = Requirements.getNumRuntimePointerChecks();
-  if (SelectedVF.Width.getKnownMinValue() > 1 && NumRuntimePointerChecks) {
-    bool PragmaThresholdReached =
-        NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
-    bool ThresholdReached =
-        NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
-    if ((ThresholdReached && !Hints.allowReordering()) ||
-        PragmaThresholdReached) {
+  if (SelectedVF.Width.getKnownMinValue() > 1 &&
+      Requirements.getNumRuntimePointerChecks()) {
+    if (Checks.getCost(CM) >
+        VectorizeMemoryCheckFactor * (*SelectedVF.ScalarCost.getValue())) {
       ORE->emit([&]() {
         return OptimizationRemarkAnalysisAliasing(
                    DEBUG_TYPE, "CantReorderMemOps", OrigLoop->getStartLoc(),
@@ -8177,7 +8197,7 @@
       });
       LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
       Hints.emitRemarkWithHints();
-      return VectorizationFactor::Disabled();
+      return None;
     }
   }
   return SelectedVF;
@@ -10279,8 +10299,10 @@
   ElementCount UserVF = Hints.getWidth();
   unsigned UserIC = Hints.getInterleave();
 
+  GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
+                           F->getParent()->getDataLayout());
   // Plan how to best vectorize, return the best VF and its cost.
-  Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC);
+  Optional<VectorizationFactor> MaybeVF = LVP.plan(UserVF, UserIC, Checks);
 
   VectorizationFactor VF = VectorizationFactor::Disabled();
   unsigned IC = 1;
@@ -10376,13 +10398,6 @@
   bool DisableRuntimeUnroll = false;
   MDNode *OrigLoopID = L->getLoopID();
   {
-    // Optimistically generate runtime checks. Drop them if they turn out to not
-    // be profitable. Limit the scope of Checks, so the cleanup happens
-    // immediately after vector codegeneration is done.
-    GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
-                             F->getParent()->getDataLayout());
-    if (!VF.Width.isScalar() || IC > 1)
-      Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
     LVP.setBestPlan(VF.Width, IC);
 
     using namespace ore;
diff --git a/llvm/test/LTO/X86/diagnostic-handler-remarks.ll b/llvm/test/LTO/X86/diagnostic-handler-remarks.ll
--- a/llvm/test/LTO/X86/diagnostic-handler-remarks.ll
+++ b/llvm/test/LTO/X86/diagnostic-handler-remarks.ll
@@ -5,6 +5,7 @@
 
 ; Confirm that there are -pass-remarks.
 ; RUN: llvm-lto -use-new-pm=false \
+; RUN:          -vectorize-memory-check-factor=0 \
 ; RUN:          -pass-remarks=inline \
 ; RUN:          -exported-symbol _func2 -pass-remarks-analysis=loop-vectorize \
 ; RUN:          -exported-symbol _main -o %t.o %t.bc 2>&1 | \
@@ -12,6 +13,7 @@
 ; RUN: llvm-nm %t.o | FileCheck %s -check-prefix NM
 
 ; RUN: llvm-lto -use-new-pm=false \
+; RUN:          -vectorize-memory-check-factor=0 \
 ; RUN:          -pass-remarks=inline -use-diagnostic-handler \
 ; RUN:          -exported-symbol _func2 -pass-remarks-analysis=loop-vectorize \
 ; RUN:          -exported-symbol _main -o %t.o %t.bc 2>&1 | \
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
@@ -1,14 +1,15 @@
-; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -S %s | FileCheck %s
+; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -S %s | FileCheck --check-prefix=CHECK --check-prefix=DEFAULT %s
+; RUN: opt -loop-vectorize -vectorize-memory-check-factor=1 -mtriple=arm64-apple-iphoneos -S %s | FileCheck --check-prefix=CHECK --check-prefix=CUSTOM %s
 
-; Tests for loops with large numbers of runtime checks. Check that loops are
-; vectorized, if the loop trip counts are large and the impact of the runtime
-; checks is very small compared to the expected loop runtimes.
+; All of the loops here have sufficiently-large loop bodies,
+; so the additional cost of the runtime memory checks is not too large,
+; so we vectorize them.
 
-
-; The trip count in the loop in this function is too to warrant large runtime checks.
 ; CHECK-LABEL: define {{.*}} @test_tc_too_small
-; CHECK-NOT: vector.memcheck
-; CHECK-NOT: vector.body
+; DEFAULT: vector.memcheck
+; DEFAULT: vector.body
+; CUSTOM-NOT:      vector.memcheck
+; CUSTOM-NOT:      vector.body
 define void @test_tc_too_small(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
@@ -57,11 +58,11 @@
   ret void
 }
 
-; FIXME
-; The trip count in the loop in this function high enough to warrant large runtime checks.
 ; CHECK-LABEL: define {{.*}} @test_tc_big_enough
-; CHECK-NOT: vector.memcheck
-; CHECK-NOT: vector.body
+; DEFAULT: vector.memcheck
+; DEFAULT: vector.body
+; CUSTOM-NOT:      vector.memcheck
+; CUSTOM-NOT:      vector.body
 define void @test_tc_big_enough(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
diff --git a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
--- a/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/runtime-limit.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -loop-vectorize -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
-; RUN: opt < %s -loop-vectorize -pragma-vectorize-memory-check-threshold=6 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s
+; RUN: opt < %s -loop-vectorize -vectorize-memory-check-factor=2 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -vectorize-memory-check-factor=1 -dce -instcombine -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -S 2>&1 | FileCheck %s -check-prefix=OVERRIDE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -8,10 +8,10 @@
 ; First loop produced diagnostic pass remark.
 ;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 2)
 ; Second loop produces diagnostic analysis remark.
-;CHECK: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
+;CHECK: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 1)
 
 ; First loop produced diagnostic pass remark.
-;OVERRIDE: remark: {{.*}}:0:0: vectorized loop (vectorization width: 4, interleaved count: 2)
+;OVERRIDE: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
 ; Second loop produces diagnostic pass remark.
 ;OVERRIDE: remark: {{.*}}:0:0: loop not vectorized: cannot prove it is safe to reorder memory operations
 
@@ -20,7 +20,7 @@
 ;CHECK: <4 x i32>
 ;CHECK: ret
 ;OVERRIDE-LABEL: func1x6(
-;OVERRIDE: <4 x i32>
+;OVERRIDE-NOT: <4 x i32>
 ;OVERRIDE: ret
 define i32 @func1x6(i32* nocapture %out, i32* nocapture %A, i32* nocapture %B, i32* nocapture %C, i32* nocapture %D, i32* nocapture %E, i32* nocapture %F) {
 entry:
@@ -54,7 +54,7 @@
 
 ; We are not vectorizing with 12 runtime checks.
 ;CHECK-LABEL: func2x6(
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret
 ; We vectorize with 12 checks if a vectorization hint is provided.
 ;OVERRIDE-LABEL: func2x6(