diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -187,7 +187,8 @@
 
   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
 
-  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints);
+  bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints,
+                   bool CanIgnoreRTThreshold);
 
 private:
   unsigned NumRuntimePointerChecks = 0;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -246,8 +246,9 @@
   }
 }
 
-bool LoopVectorizationRequirements::doesNotMeet(
-    Function *F, Loop *L, const LoopVectorizeHints &Hints) {
+bool LoopVectorizationRequirements::doesNotMeet(Function *F, Loop *L,
+                                                const LoopVectorizeHints &Hints,
+                                                bool IgnoreRTThreshold) {
   const char *PassName = Hints.vectorizeAnalysisPassName();
   bool Failed = false;
   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
@@ -266,8 +267,12 @@
       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
   bool ThresholdReached =
       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
-  if ((ThresholdReached && !Hints.allowReordering()) ||
-      PragmaThresholdReached) {
+  bool DoubleThresholdReached =
+      NumRuntimePointerChecks >
+      2 * VectorizerParams::RuntimeMemoryCheckThreshold;
+  if ((!IgnoreRTThreshold && ((ThresholdReached && !Hints.allowReordering()) ||
+                              PragmaThresholdReached)) ||
+      (DoubleThresholdReached && !Hints.allowReordering())) {
     ORE.emit([&]() {
       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
                                                 L->getStartLoc(),
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -330,6 +330,11 @@
     cl::desc(
         "Prefer predicating a reduction operation over an after loop select."));
 
+static cl::opt<float> RuntimeCheckOverheadFraction(
+    "lv-runtime-check-overhead-fraction", cl::init(0.005), cl::Hidden,
+    cl::desc("The maximum fraction of the allowed overhead runtime checks can "
+             "add compared to the runtime of the loop."));
+
 cl::opt<bool> EnableVPlanNativePath(
     "enable-vplan-native-path", cl::init(false), cl::Hidden,
     cl::desc("Enable VPlan-native vectorization path with "
@@ -1602,9 +1607,6 @@
     Scalars.clear();
   }
 
-private:
-  unsigned NumPredStores = 0;
-
   /// \return An upper bound for the vectorization factor, a power-of-2 larger
   /// than zero. One is returned if vectorization should best be avoided due
   /// to cost.
@@ -1620,16 +1622,21 @@
   /// actually taken place).
   using VectorizationCostTy = std::pair<InstructionCost, bool>;
 
+  /// Returns the execution time cost of an instruction for a given vector
+  /// width. Vector width of one means scalar.
+  VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
+
+  float ScalarCost;
+
+private:
+  unsigned NumPredStores = 0;
+
   /// Returns the expected execution cost. The unit of the cost does
   /// not matter because we use the 'cost' units to compare different
   /// vector widths. The cost that is returned is *not* normalized by
   /// the factor width.
   VectorizationCostTy expectedCost(ElementCount VF);
 
-  /// Returns the execution time cost of an instruction for a given vector
-  /// width. Vector width of one means scalar.
-  VectorizationCostTy getInstructionCost(Instruction *I, ElementCount VF);
-
   /// The cost-computation logic from getInstructionCost which provides
   /// the vector type as an output parameter.
   InstructionCost getInstructionCost(Instruction *I, ElementCount VF,
@@ -1949,6 +1956,25 @@
     }
   }
 
+  unsigned getCost(LoopVectorizationCostModel &CM) {
+    unsigned RTCheckCost = 0;
+    if (SCEVCheckBlock)
+      for (Instruction &I : *SCEVCheckBlock) {
+        if (SCEVCheckBlock->getTerminator() == &I)
+          continue;
+        RTCheckCost += *CM.getInstructionCost(&I, ElementCount::getFixed(1))
+                            .first.getValue();
+      }
+    if (MemCheckBlock)
+      for (Instruction &I : *MemCheckBlock) {
+        if (MemCheckBlock->getTerminator() == &I)
+          continue;
+        RTCheckCost += *CM.getInstructionCost(&I, ElementCount::getFixed(1))
+                            .first.getValue();
+      }
+    return RTCheckCost;
+  }
+
   /// Remove the created SCEV & memory runtime check blocks & instructions, if
   /// unused.
   ~GeneratedRTChecks() {
@@ -5852,7 +5878,7 @@
   assert(ExpectedCost.isValid() && "Unexpected invalid cost for scalar loop");
 
   auto Width = ElementCount::getFixed(1);
-  const float ScalarCost = *ExpectedCost.getValue();
+  ScalarCost = *ExpectedCost.getValue();
   float Cost = ScalarCost;
 
   bool ForceVectorization = Hints->getForce() == LoopVectorizeHints::FK_Enabled;
@@ -9618,13 +9644,6 @@
   // Identify the diagnostic messages that should be produced.
   std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
   bool VectorizeLoop = true, InterleaveLoop = true;
-  if (Requirements.doesNotMeet(F, L, Hints)) {
-    LLVM_DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
-                         "requirements.\n");
-    Hints.emitRemarkWithHints();
-    return false;
-  }
-
   if (VF.Width.isScalar()) {
     LLVM_DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
     VecDiagMsg = std::make_pair(
@@ -9712,8 +9731,31 @@
     // immediately after vector codegeneration is done.
     GeneratedRTChecks Checks(*PSE.getSE(), DT, LI,
                              F->getParent()->getDataLayout());
-    if (!VF.Width.isScalar() || IC > 1)
+    bool CanIgnoreRTThreshold = true;
+    if (!VF.Width.isScalar() || IC > 1) {
+      CanIgnoreRTThreshold = false;
+
       Checks.Create(L, *LVL.getLAI(), PSE.getUnionPredicate());
+      if (ExpectedTC) {
+        unsigned RTCost = Checks.getCost(CM);
+        // If the expected cost of the runtime checks is a small fraction of the
+        // expected cost of the scalar loop, we can be more aggressive with
+        // using runtime checks.
+        CanIgnoreRTThreshold = RTCost < (*ExpectedTC * CM.ScalarCost *
+                                         RuntimeCheckOverheadFraction);
+        LLVM_DEBUG(dbgs() << "LV: Cost of runtime check: " << RTCost << " "
+                          << *ExpectedTC * CM.ScalarCost << "\n");
+      }
+    }
+
+    if (Requirements.doesNotMeet(F, L, Hints, CanIgnoreRTThreshold)) {
+      LLVM_DEBUG(
+          dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
+                    "requirements.\n");
+      Hints.emitRemarkWithHints();
+      return false;
+    }
+
     LVP.setBestPlan(VF.Width, IC);
 
     using namespace ore;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/runtime-check-size-based-threshold.ll
@@ -1,14 +1,19 @@
-; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -S %s | FileCheck %s
+; RUN: opt -loop-vectorize -mtriple=arm64-apple-iphoneos -S %s | FileCheck --check-prefix=CHECK --check-prefix=DEFAULT %s
+; RUN: opt -loop-vectorize -lv-runtime-check-overhead-fraction=0.5 -mtriple=arm64-apple-iphoneos -S %s | FileCheck --check-prefix=CHECK --check-prefix=CUSTOM %s
 
 ; Tests for loops with large numbers of runtime checks. Check that loops are
 ; vectorized, if the loop trip counts are large and the impact of the runtime
 ; checks is very small compared to the expected loop runtimes.
 
 
-; The trip count in the loop in this function is too to warrant large runtime checks.
+; The trip count in the loop in this function is too to warrant large runtime
+; checks with the default threshold. It should be vectorized with a larger
+; custom threshold.
 ; CHECK-LABEL: define {{.*}} @test_tc_too_small
-; CHECK-NOT: vector.memcheck
-; CHECK-NOT: vector.body
+; DEFAULT-NOT: vector.memcheck
+; DEFAULT-NOT: vector.body
+; CUSTOM:      vector.memcheck
+; CUSTOM:      vector.body
 define void @test_tc_too_small(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop
@@ -57,11 +62,10 @@
   ret void
 }
 
-; FIXME
 ; The trip count in the loop in this function high enough to warrant large runtime checks.
 ; CHECK-LABEL: define {{.*}} @test_tc_big_enough
-; CHECK-NOT: vector.memcheck
-; CHECK-NOT: vector.body
+; CHECK: vector.memcheck
+; CHECK: vector.body
 define void @test_tc_big_enough(i16* %ptr.1, i16* %ptr.2, i16* %ptr.3, i16* %ptr.4, i64 %off.1, i64 %off.2) {
 entry:
   br label %loop