Index: llvm/include/llvm/Analysis/TargetTransformInfo.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -660,6 +660,10 @@
   /// Return true if the target supports masked expand load.
   bool isLegalMaskedExpandLoad(Type *DataType) const;
 
+  /// Return true if the target supports vectorization of the intrinsic IID for
+  /// a given ElementCount VF.
+  bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) const;
+
   /// Return true if the target has a unified operation to calculate division
   /// and remainder. If so, the additional implicit multiplication and
   /// subtraction required to calculate a remainder from division are free. This
@@ -1516,6 +1520,7 @@
   virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
   virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
   virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
+  virtual bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) = 0;
   virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
   virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
   virtual bool prefersVectorizedAddressing() = 0;
@@ -1897,6 +1902,10 @@
   bool isLegalMaskedExpandLoad(Type *DataType) override {
     return Impl.isLegalMaskedExpandLoad(DataType);
   }
+  bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) override {
+    return Impl.isLegalIntrinsicForScalableVectors(IID);
+  }
+
   bool hasDivRemOp(Type *DataType, bool IsSigned) override {
     return Impl.hasDivRemOp(DataType, IsSigned);
   }
Index: llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
===================================================================
--- llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -258,6 +258,10 @@
     return false;
   }
 
+  bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) const {
+    return false;
+  }
+
   bool isLegalMaskedCompressStore(Type *DataType) const { return false; }
 
   bool isLegalMaskedExpandLoad(Type *DataType) const { return false; }
Index: llvm/lib/Analysis/TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Analysis/TargetTransformInfo.cpp
+++ llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -409,6 +409,11 @@
   return TTIImpl->isLegalMaskedExpandLoad(DataType);
 }
 
+bool TargetTransformInfo::isLegalIntrinsicForScalableVectors(
+    Intrinsic::ID IID) const {
+  return TTIImpl->isLegalIntrinsicForScalableVectors(IID);
+}
+
 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
   return TTIImpl->hasDivRemOp(DataType, IsSigned);
 }
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -256,6 +256,8 @@
     return isLegalMaskedGatherScatter(DataType);
   }
 
+  bool isLegalIntrinsicForScalableVectors(Intrinsic::ID IID) const;
+
   bool isLegalNTStore(Type *DataType, Align Alignment) {
     // NOTE: The logic below is mostly geared towards LV, which calls it with
     //       vectors with 2 elements. We might want to improve that, if other
Index: llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1905,3 +1905,49 @@
 
   return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
 }
+
+bool AArch64TTIImpl::isLegalIntrinsicForScalableVectors(
+    Intrinsic::ID IID) const {
+  switch (IID) {
+  case Intrinsic::abs:
+  case Intrinsic::bswap:
+  case Intrinsic::bitreverse:
+  case Intrinsic::ctpop:
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+  case Intrinsic::fshl:
+  case Intrinsic::fshr:
+  case Intrinsic::smax:
+  case Intrinsic::smin:
+  case Intrinsic::umax:
+  case Intrinsic::umin:
+  case Intrinsic::sadd_sat:
+  case Intrinsic::ssub_sat:
+  case Intrinsic::uadd_sat:
+  case Intrinsic::usub_sat:
+  case Intrinsic::smul_fix:
+  case Intrinsic::smul_fix_sat:
+  case Intrinsic::umul_fix:
+  case Intrinsic::umul_fix_sat:
+  case Intrinsic::sqrt:
+  case Intrinsic::fabs:
+  case Intrinsic::minnum:
+  case Intrinsic::maxnum:
+  case Intrinsic::minimum:
+  case Intrinsic::maximum:
+  case Intrinsic::floor:
+  case Intrinsic::ceil:
+  case Intrinsic::trunc:
+  case Intrinsic::rint:
+  case Intrinsic::nearbyint:
+  case Intrinsic::round:
+  case Intrinsic::roundeven:
+  case Intrinsic::fma:
+  case Intrinsic::fmuladd:
+    return true;
+  default:
+    // We can fall back on scalarization for fixed width vectors, but not for
+    // scalable vectors.
+    return BaseT::isLegalIntrinsicForScalableVectors(IID);
+  }
+}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1513,13 +1513,18 @@
 
   /// Returns true if the target machine supports all of the reduction
   /// variables found for the given VF.
-  bool canVectorizeReductions(ElementCount VF) {
+  bool canVectorizeReductions(ElementCount VF) const {
     return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
       const RecurrenceDescriptor &RdxDesc = Reduction.second;
       return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
     }));
   }
 
+  /// Returns true if we can widen all instructions in the loop using a maximum
+  /// scalable vectorization factor MaxVF. If the loop is illegal the function
+  /// returns an appropriate error remark in Msg.
+  bool canWidenLoopWithScalableVectors(ElementCount MaxVF) const;
+
   /// Returns true if \p I is an instruction that will be scalarized with
   /// predication. Such instructions include conditional stores and
   /// instructions that may divide by zero.
@@ -5661,6 +5666,60 @@
   return false;
 }
 
+bool LoopVectorizationCostModel::canWidenLoopWithScalableVectors(
+    ElementCount MaxVF) const {
+  // Test that the loop-vectorizer can legalize all operations for eligible
+  // vectorization factors up to MaxVF.
+
+  // Disable scalable vectorization if the loop contains unsupported reductions.
+  if (!canVectorizeReductions(MaxVF)) {
+    reportVectorizationInfo("Scalable vectorization not supported for the "
+                            "reduction operations found in this loop.",
+                            "ScalableVFUnfeasible", ORE, TheLoop);
+    return false;
+  }
+
+  // Iterate through all instructions in the loop ensuring that is legal to
+  // vectorize with a scalable VF.
+  for (BasicBlock *BB : TheLoop->blocks()) {
+    for (Instruction &I : *BB) {
+      if (I.isDebugOrPseudoInst())
+        continue;
+
+      if (auto *CI = dyn_cast<CallInst>(&I)) {
+        Intrinsic::ID VecID = getVectorIntrinsicIDForCall(CI, TLI);
+
+        // First check if it's always legal to widen this intrinsic regardless
+        // of the scalable VF, i.e. we don't have to worry about scalarizing
+        // the intrinsic.
+        if (VecID && TTI.isLegalIntrinsicForScalableVectors(VecID))
+          continue;
+
+        // At this point we have no guarantee that we can widen this call
+        // unless we have mappings in the vector function database.
+        bool HasScalableMapping =
+            llvm::any_of(VFDatabase::getMappings(*CI),
+                         [&](const VFInfo &V) { return V.Shape.IsScalable; });
+
+        // FIXME: This is still optimistic because we assume that since we have
+        // at least one supported VF we can vectorize the loop. However, in
+        // future we should really filter out all unsupported VFs when deciding
+        // which one to choose.
+        if (!HasScalableMapping) {
+          StringRef FnName = CI->getCalledFunction()->getName();
+          reportVectorizationInfo("Scalable vectorization not supported for "
+                                  "the call instruction (" +
+                                      FnName.str() + ") found in this loop",
+                                  "ScalableVFUnfeasible", ORE, TheLoop);
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
 ElementCount
 LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
   if (!TTI.supportsScalableVectors() && !ForceTargetSupportsScalableVectors) {
@@ -5680,19 +5739,8 @@
   auto MaxScalableVF = ElementCount::getScalable(
       std::numeric_limits<ElementCount::ScalarTy>::max());
 
-  // Disable scalable vectorization if the loop contains unsupported reductions.
-  // Test that the loop-vectorizer can legalize all operations for this MaxVF.
-  // FIXME: While for scalable vectors this is currently sufficient, this should
-  // be replaced by a more detailed mechanism that filters out specific VFs,
-  // instead of invalidating vectorization for a whole set of VFs based on the
-  // MaxVF.
-  if (!canVectorizeReductions(MaxScalableVF)) {
-    reportVectorizationInfo(
-        "Scalable vectorization not supported for the reduction "
-        "operations found in this loop.",
-        "ScalableVFUnfeasible", ORE, TheLoop);
+  if (!canWidenLoopWithScalableVectors(MaxScalableVF))
     return ElementCount::getScalable(0);
-  }
 
   if (Legal->isSafeForAnyVectorWidth())
     return MaxScalableVF;
Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-call.ll
@@ -1,4 +1,6 @@
-; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on < %s | FileCheck %s
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -instcombine -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=on \
+; RUN:     -pass-remarks-missed=loop-vectorize < %s 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s --check-prefix=CHECK-REMARKS
 
 define void @vec_load(i64 %N, double* nocapture %a, double* nocapture readonly %b) {
 ; CHECK-LABEL: @vec_load
@@ -75,7 +77,7 @@
 ; CHECK-LABEL: @vec_intrinsic
 ; CHECK: vector.body:
 ; CHECK: %[[LOAD:.*]] = load <vscale x 2 x double>, <vscale x 2 x double>*
-; CHECK: call fast <vscale x 2 x double> @sin_vec(<vscale x 2 x double> %[[LOAD]])
+; CHECK: call fast <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double> %[[LOAD]])
 entry:
   %cmp7 = icmp sgt i64 %N, 0
   br i1 %cmp7, label %for.body, label %for.end
@@ -95,17 +97,92 @@
   ret void
 }
 
+; CHECK-REMARKS: Scalable vectorization not supported for the call instruction (llvm.sin.f32) found in this loop
+define void @vec_sin_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+; CHECK: @vec_sin_no_mapping
+; CHECK: call fast <2 x float> @llvm.sin.v2f32
+; CHECK-NOT: <vscale x
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
+  %0 = load float, float* %arrayidx, align 4
+  %1 = tail call fast float @llvm.sin.f32(float %0)
+  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
+  store float %1, float* %arrayidx1, align 4
+  %inc = add nuw nsw i64 %i.07, 1
+  %exitcond.not = icmp eq i64 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization not supported for the call instruction (llvm.sin.f32) found in this loop
+define void @vec_sin_fixed_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) {
+; CHECK: @vec_sin_fixed_mapping
+; CHECK: call fast <2 x float> @llvm.sin.v2f32
+; CHECK-NOT: <vscale x
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
+  %0 = load float, float* %arrayidx, align 4
+  %1 = tail call fast float @llvm.sin.f32(float %0) #3
+  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
+  store float %1, float* %arrayidx1, align 4
+  %inc = add nuw nsw i64 %i.07, 1
+  %exitcond.not = icmp eq i64 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+}
+
+; Even though there are no function mappings attached to the call
+; in the loop below we can still vectorize the loop because SVE has
+; hardware support in the form of the 'fqsrt' instruction.
+define void @vec_sqrt_no_mapping(float* noalias nocapture %dst, float* noalias nocapture readonly %src, i64 %n) #0 {
+; CHECK: @vec_sqrt_no_mapping
+; CHECK: call fast <vscale x 2 x float> @llvm.sqrt.nxv2f32
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.07 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds float, float* %src, i64 %i.07
+  %0 = load float, float* %arrayidx, align 4
+  %1 = tail call fast float @llvm.sqrt.f32(float %0)
+  %arrayidx1 = getelementptr inbounds float, float* %dst, i64 %i.07
+  store float %1, float* %arrayidx1, align 4
+  %inc = add nuw nsw i64 %i.07, 1
+  %exitcond.not = icmp eq i64 %inc, %n
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !1
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+}
+
+
 declare double @foo(double)
 declare i64 @bar(i64*)
 declare double @llvm.sin.f64(double)
+declare float @llvm.sin.f32(float)
+declare float @llvm.sqrt.f32(float)
 
 declare <vscale x 2 x double> @foo_vec(<vscale x 2 x double>)
 declare <vscale x 2 x i64> @bar_vec(<vscale x 2 x i64*>)
-declare <vscale x 2 x double> @sin_vec(<vscale x 2 x double>)
+declare <vscale x 2 x double> @sin_vec_nxv2f64(<vscale x 2 x double>)
+declare <2 x double> @sin_vec_v2f64(<2 x double>)
 
 attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_foo(foo_vec)" }
 attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_bar(bar_vec)" }
-attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec)" }
+attributes #2 = { "vector-function-abi-variant"="_ZGV_LLVM_Nxv_llvm.sin.f64(sin_vec_nxv2f64)" }
+attributes #3 = { "vector-function-abi-variant"="_ZGV_LLVM_N2v_llvm.sin.f64(sin_vec_v2f64)" }
 
 !1 = distinct !{!1, !2, !3}
 !2 = !{!"llvm.loop.vectorize.width", i32 2}