diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -5280,6 +5280,15 @@
   return cast<StoreInst>(I)->getPointerAddressSpace();
 }
 
+/// A helper function that returns the type of loaded or stored value.
+inline Type *getLoadStoreInstValueType(Value *I) {
+  assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
+         "Expected Load or Store instruction");
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    return LI->getType();
+  return cast<StoreInst>(I)->getValueOperand()->getType();
+}
+
 //===----------------------------------------------------------------------===//
 //                              FreezeInst Class
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -255,6 +255,12 @@
   /// If false, good old LV code.
   bool canVectorize(bool UseVPlanNativePath);
 
+  /// \return true if the instruction can be vectorized. If \p
+  /// VectorizeWithPredication is true, then the strategy requires predication
+  /// to vectorize the loop.
+  bool canVectorizeWithPredication(Instruction *I,
+                                   bool VectorizeWithPredication);
+
   /// Return true if we can vectorize this loop while folding its tail by
   /// masking, and mark all respective loads/stores for masking.
   /// This object's state is only modified iff this function returns true.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -833,6 +833,58 @@
   return true;
 }
 
+/// A helper function for checking whether an integer division-related
+/// instruction may divide by zero (in which case it must be predicated if
+/// executed conditionally in the scalar code).
+/// TODO: It may be worthwhile to generalize and check isKnownNonZero().
+/// Non-zero divisors that are non compile-time constants will not be
+/// converted into multiplication, so we will still end up scalarizing
+/// the division, but can do so w/o predication.
+static bool mayDivideByZero(Instruction &I) {
+  assert((I.getOpcode() == Instruction::UDiv ||
+          I.getOpcode() == Instruction::SDiv ||
+          I.getOpcode() == Instruction::URem ||
+          I.getOpcode() == Instruction::SRem) &&
+         "Unexpected instruction");
+  Value *Divisor = I.getOperand(1);
+  auto *CInt = dyn_cast<ConstantInt>(Divisor);
+  return !CInt || CInt->isZero();
+}
+
+bool LoopVectorizationLegality::canVectorizeWithPredication(
+    Instruction *I, bool VectorizeWithPredication) {
+  switch (I->getOpcode()) {
+  default:
+    break;
+  case Instruction::Load:
+  case Instruction::Store: {
+    // Some instructions can be speculated, even when predication is used
+    // for the block.
+    if (!isMaskRequired(I))
+      return true;
+
+    auto *Ptr = getLoadStorePointerOperand(I);
+    auto *Ty = getLoadStoreInstValueType(I);
+    const Align Alignment = getLoadStoreAlignment(I);
+    return isa<LoadInst>(I) ? ((isConsecutivePtr(Ptr) &&
+                                TTI->isLegalMaskedLoad(Ty, Alignment)) ||
+                               TTI->isLegalMaskedGather(Ty, Alignment))
+                            : ((isConsecutivePtr(Ptr) &&
+                                TTI->isLegalMaskedStore(Ty, Alignment)) ||
+                               TTI->isLegalMaskedScatter(Ty, Alignment));
+  }
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    // If predication is used for this block and the operation would otherwise
+    // be guarded, then this requires scalarizing.
+    if (blockNeedsPredication(I->getParent()) || VectorizeWithPredication)
+      return !mayDivideByZero(*I);
+  }
+  return true;
+}
+
 bool LoopVectorizationLegality::canVectorizeMemory() {
   LAI = &(*GetLAA)(*TheLoop);
   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -377,11 +377,7 @@
 
 /// A helper function that returns the type of loaded or stored value.
 static Type *getMemInstValueType(Value *I) {
-  assert((isa<LoadInst>(I) || isa<StoreInst>(I)) &&
-         "Expected Load or Store instruction");
-  if (auto *LI = dyn_cast<LoadInst>(I))
-    return LI->getType();
-  return cast<StoreInst>(I)->getValueOperand()->getType();
+  return getLoadStoreInstValueType(I);
 }
 
 /// A helper function that returns true if the given type is irregular. The
@@ -1515,23 +1511,20 @@
   /// Returns true if \p I is an instruction that will be scalarized with
   /// predication. Such instructions include conditional stores and
   /// instructions that may divide by zero.
-  /// If a non-zero VF has been calculated, we check if I will be scalarized
-  /// predication for that VF.
   bool
-  isScalarWithPredication(Instruction *I,
-                          ElementCount VF = ElementCount::getFixed(1)) const;
+  isScalarWithPredication(Instruction *I) const;
 
   // Returns true if \p I is an instruction that will be predicated either
   // through scalar predication or masked load/store or masked gather/scatter.
   // Superset of instructions that return true for isScalarWithPredication.
   bool isPredicatedInst(Instruction *I, ElementCount VF) {
-    if (!blockNeedsPredication(I->getParent()))
+    if (!Legal->blockNeedsPredication(I->getParent()) && !foldTailByMasking())
       return false;
     // Loads and stores that need some form of masked operation are predicated
     // instructions.
     if (isa<LoadInst>(I) || isa<StoreInst>(I))
       return Legal->isMaskRequired(I);
-    return isScalarWithPredication(I, VF);
+    return isScalarWithPredication(I);
   }
 
   /// Returns true if \p I is a memory instruction with consecutive memory
@@ -4917,24 +4910,6 @@
   }
 }
 
-/// A helper function for checking whether an integer division-related
-/// instruction may divide by zero (in which case it must be predicated if
-/// executed conditionally in the scalar code).
-/// TODO: It may be worthwhile to generalize and check isKnownNonZero().
-/// Non-zero divisors that are non compile-time constants will not be
-/// converted into multiplication, so we will still end up scalarizing
-/// the division, but can do so w/o predication.
-static bool mayDivideByZero(Instruction &I) {
-  assert((I.getOpcode() == Instruction::UDiv ||
-          I.getOpcode() == Instruction::SDiv ||
-          I.getOpcode() == Instruction::URem ||
-          I.getOpcode() == Instruction::SRem) &&
-         "Unexpected instruction");
-  Value *Divisor = I.getOperand(1);
-  auto *CInt = dyn_cast<ConstantInt>(Divisor);
-  return !CInt || CInt->isZero();
-}
-
 void InnerLoopVectorizer::widenInstruction(Instruction &I, VPValue *Def,
                                            VPUser &User,
                                            VPTransformState &State) {
@@ -5326,39 +5301,8 @@
 }
 
 bool LoopVectorizationCostModel::isScalarWithPredication(
-    Instruction *I, ElementCount VF) const {
-  if (!blockNeedsPredication(I->getParent()))
-    return false;
-  switch(I->getOpcode()) {
-  default:
-    break;
-  case Instruction::Load:
-  case Instruction::Store: {
-    if (!Legal->isMaskRequired(I))
-      return false;
-    auto *Ptr = getLoadStorePointerOperand(I);
-    auto *Ty = getMemInstValueType(I);
-    // We have already decided how to vectorize this instruction, get that
-    // result.
-    if (VF.isVector()) {
-      InstWidening WideningDecision = getWideningDecision(I, VF);
-      assert(WideningDecision != CM_Unknown &&
-             "Widening decision should be ready at this moment");
-      return WideningDecision == CM_Scalarize;
-    }
-    const Align Alignment = getLoadStoreAlignment(I);
-    return isa<LoadInst>(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) ||
-                                isLegalMaskedGather(Ty, Alignment))
-                            : !(isLegalMaskedStore(Ty, Ptr, Alignment) ||
-                                isLegalMaskedScatter(Ty, Alignment));
-  }
-  case Instruction::UDiv:
-  case Instruction::SDiv:
-  case Instruction::SRem:
-  case Instruction::URem:
-    return mayDivideByZero(*I);
-  }
-  return false;
+    Instruction *I) const {
+  return !Legal->canVectorizeWithPredication(I, foldTailByMasking());
 }
 
 bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
@@ -5463,7 +5407,7 @@
                         << *I << "\n");
       return;
     }
-    if (isScalarWithPredication(I, VF)) {
+    if (isScalarWithPredication(I)) {
       LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: "
                         << *I << "\n");
       return;
@@ -6707,7 +6651,7 @@
   // determine if it would be better to not if-convert the blocks they are in.
   // If so, we also record the instructions to scalarize.
   for (BasicBlock *BB : TheLoop->blocks()) {
-    if (!blockNeedsPredication(BB))
+    if (!Legal->blockNeedsPredication(BB) && !foldTailByMasking())
       continue;
     for (Instruction &I : *BB)
       if (isScalarWithPredication(&I)) {
@@ -7944,7 +7888,8 @@
     return None;
 
   // Invalidate interleave groups if all blocks of loop will be predicated.
-  if (CM.blockNeedsPredication(OrigLoop->getHeader()) &&
+  if ((Legal->blockNeedsPredication(OrigLoop->getHeader()) ||
+       CM.foldTailByMasking()) &&
       !useMaskedInterleavedAccesses(*TTI)) {
     LLVM_DEBUG(
         dbgs()
@@ -8544,7 +8489,7 @@
   VPValue *BlockMask = nullptr;
 
   if (OrigLoop->getHeader() == BB) {
-    if (!CM.blockNeedsPredication(BB))
+    if (!Legal->blockNeedsPredication(BB) && !CM.foldTailByMasking())
       return BlockMaskCache[BB] = BlockMask; // Loop incoming mask is all-one.
 
     // Create the block in mask as the first non-phi instruction in the block.
@@ -8717,7 +8662,7 @@
 
   bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
       [this, CI](ElementCount VF) {
-        return CM.isScalarWithPredication(CI, VF);
+        return CM.isScalarWithPredication(CI);
       },
       Range);
 
@@ -8761,7 +8706,7 @@
   auto WillScalarize = [this, I](ElementCount VF) -> bool {
     return CM.isScalarAfterVectorization(I, VF) ||
            CM.isProfitableToScalarize(I, VF) ||
-           CM.isScalarWithPredication(I, VF);
+           CM.isScalarWithPredication(I);
   };
   return !LoopVectorizationPlanner::getDecisionAndClampRange(WillScalarize,
                                                              Range);