diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -138,6 +138,15 @@
                                     const Function *Callee,
                                     SmallPtrSetImpl<Argument *> &Args) const;
   bool hasActiveVectorLength(Type *DataType, Align Alignment) const;
+  InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+                                    unsigned AddressSpace,
+                                    TTI::TargetCostKind CostKind,
+                                    const Instruction *I = nullptr);
+
+private:
+  // The following constant is used for estimating costs on power9.
+  static const InstructionCost::CostType P9PipelineFlushEstimate = 80;
+
   /// @}
 };
 
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -1339,6 +1339,100 @@
 }
 
 bool PPCTTIImpl::hasActiveVectorLength(Type *DataType, Align Alignment) const {
-  // TODO
-  return false;
+  // Loads/stores with length instructions use bits 0-7 of the GPR operand and
+  // therefore cannot be used in 32-bit mode.
+  if ((!ST->hasP9Vector() && !ST->hasP10Vector()) || !ST->isPPC64())
+    return false;
+  if (auto *VecTy = dyn_cast<FixedVectorType>(DataType)) {
+    unsigned VecWidth = DataType->getPrimitiveSizeInBits();
+    return VecWidth == 128;
+  }
+  Type *ScalarTy = DataType->getScalarType();
+
+  if (ScalarTy->isPointerTy())
+    return true;
+
+  if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
+    return true;
+
+  if (!ScalarTy->isIntegerTy())
+    return false;
+
+  unsigned IntWidth = ScalarTy->getIntegerBitWidth();
+  return IntWidth == 8 || IntWidth == 16 || IntWidth == 32 || IntWidth == 64;
+}
+
+InstructionCost PPCTTIImpl::getVPMemoryOpCost(unsigned Opcode, Type *Src,
+                                              Align Alignment,
+                                              unsigned AddressSpace,
+                                              TTI::TargetCostKind CostKind,
+                                              const Instruction *I) {
+  InstructionCost Cost = BaseT::getVPMemoryOpCost(Opcode, Src, Alignment,
+                                                  AddressSpace, CostKind, I);
+  if (TLI->getValueType(DL, Src, true) == MVT::Other)
+    return Cost;
+  // TODO: Handle other cost kinds.
+  if (CostKind != TTI::TCK_RecipThroughput)
+    return Cost;
+
+  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
+         "Invalid Opcode");
+  bool IsLoad = (Opcode == Instruction::Load);
+
+  auto *SrcVTy = dyn_cast<FixedVectorType>(Src);
+  assert(SrcVTy && "Expected a vector type for VP memory operations");
+
+  if (hasActiveVectorLength(Src, Alignment)) {
+    std::pair<InstructionCost, MVT> LT =
+        TLI->getTypeLegalizationCost(DL, SrcVTy);
+    InstructionCost Cost = vectorCostAdjustment(LT.first, Opcode, Src, nullptr);
+
+    // On P9 but not on P10, if the op is misaligned then it will cause a
+    // pipeline flush. Otherwise the VSX masked memops cost the same as unmasked
+    // ones.
+    if (Alignment >= 16 || ST->getCPUDirective() != PPC::DIR_PWR9)
+      return Cost;
+
+    // We assume the average case: that ops with alignment <= 128
+    // will flush a full pipeline about half the time.
+    // The cost when this happens is about 80 cycles.
+    return P9PipelineFlushEstimate / 2;
+  }
+
+  // Usually we should not get to this point, but the following is an attempt to
+  // model the cost of legalization. Currently we can only lower intrinsics with
+  // evl but no mask, on Power 9/10. Otherwise, we must scalarize. We need to
+  // extract (from the mask) the most/least significant byte of all halfwords
+  // aligned with vector elements, and do an access predicated on its 0th bit.
+  // We make the simplifying assumption that byte-extraction costs are
+  // stride-invariant, so we model the extraction as scalarizing a load of
+  // <NumElems x i8>.
+
+  // VSX masks have lanes per bit, but the predication is per halfword.
+  unsigned NumElems = SrcVTy->getNumElements();
+  auto *MaskI8Ty = Type::getInt8Ty(SrcVTy->getContext());
+  InstructionCost MaskSplitCost = getScalarizationOverhead(
+      FixedVectorType::get(MaskI8Ty, NumElems), false, true);
+  const InstructionCost ScalarCompareCostInstrCost =
+      getCmpSelInstrCost(Instruction::ICmp, MaskI8Ty, nullptr,
+                         CmpInst::BAD_ICMP_PREDICATE, CostKind);
+
+  assert(ScalarCompareCostInstrCost.isValid() &&
+         "Expected valid instruction cost");
+  int ScalarCompareCost = *(ScalarCompareCostInstrCost.getValue());
+
+  const InstructionCost BranchInstrCost =
+      getCFInstrCost(Instruction::Br, CostKind);
+  assert(BranchInstrCost.isValid() && "Expected valid instruction cost");
+  int BranchCost = *BranchInstrCost.getValue();
+  int MaskCmpCost = NumElems * (BranchCost + ScalarCompareCost);
+
+  InstructionCost ValueSplitCost =
+      getScalarizationOverhead(SrcVTy, IsLoad, !IsLoad);
+  const InstructionCost ScalarMemOpInstrCost =
+      NumElems * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
+                                        Alignment, AddressSpace, CostKind);
+  assert(ScalarMemOpInstrCost.isValid() && "Expected valid instruction cost");
+  int ScalarMemOpCost = *(ScalarMemOpInstrCost.getValue());
+  return ScalarMemOpCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
 }