diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -644,6 +644,9 @@
 TLI_DEFINE_VECFUNC("exp10", "_ZGVsMxv_exp10",  SCALABLE(2), MASKED)
 TLI_DEFINE_VECFUNC("exp10f", "_ZGVsMxv_exp10f", SCALABLE(4), MASKED)
 
+TLI_DEFINE_VECFUNC("fmod", "_ZGVsMxvv_fmod", SCALABLE(2), MASKED)
+TLI_DEFINE_VECFUNC("fmodf", "_ZGVsMxvv_fmodf", SCALABLE(4), MASKED)
+
 TLI_DEFINE_VECFUNC("lgamma", "_ZGVsMxv_lgamma",  SCALABLE(2), MASKED)
 TLI_DEFINE_VECFUNC("lgammaf", "_ZGVsMxv_lgammaf", SCALABLE(4), MASKED)
 
diff --git a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
--- a/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
+++ b/llvm/lib/CodeGen/ReplaceWithVeclib.cpp
@@ -89,6 +89,62 @@
   return true;
 }
 
+static bool replaceInstructionWithTLIFunction(Instruction &I,
+                                              const StringRef TLIName,
+                                              bool Masked,
+                                              ElementCount NumElements,
+                                              Type *ElementType) {
+  Module *M = I.getModule();
+  IRBuilder<> IRBuilder(&I);
+
+  // Check if the vector library function is already declared in this module,
+  // otherwise insert it.
+  Function *TLIFunc = M->getFunction(TLIName);
+  if (!TLIFunc) {
+    FunctionType *FTy = nullptr;
+    Type *RetTy = I.getType();
+    if (Masked) {
+      Type *Tys[3] = {RetTy, RetTy,
+                      ToVectorTy(IRBuilder.getInt1Ty(), NumElements)};
+      FTy = FunctionType::get(RetTy, Tys, false);
+    } else {
+      Type *Tys[2] = {RetTy, RetTy};
+      FTy = FunctionType::get(RetTy, Tys, false);
+    }
+    TLIFunc = Function::Create(FTy, Function::ExternalLinkage, TLIName, *M);
+
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Added vector library function `"
+                      << TLIName << "` of type `" << *(TLIFunc->getType())
+                      << "` to module.\n");
+
+    ++NumTLIFuncDeclAdded;
+
+    // Add the freshly created function to llvm.compiler.used,
+    // similar to as it is done in InjectTLIMappings
+    appendToCompilerUsed(*M, {TLIFunc});
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Adding `" << TLIName
+                      << "` to `@llvm.compiler.used`.\n");
+    ++NumFuncUsedAdded;
+  }
+  SmallVector<Value *> Args(I.operand_values());
+  if (Masked) {
+    Value *AllActiveMask = ConstantInt::getTrue(VectorType::get(
+        IntegerType::getInt1Ty(TLIFunc->getType()->getContext()), NumElements));
+    Args.push_back(AllActiveMask);
+  }
+  CallInst *Replacement = IRBuilder.CreateCall(TLIFunc, Args);
+  I.replaceAllUsesWith(Replacement);
+  if (isa<FPMathOperator>(Replacement)) {
+    // Preserve fast math flags for FP math.
+    Replacement->copyFastMathFlags(&I);
+  }
+  Replacement->copyMetadata(I);
+  LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" << I.getOpcodeName()
+                    << "` with call to `" << TLIName << "`.\n");
+  ++NumCallsReplaced;
+  return true;
+}
+
 static bool replaceWithCallToVeclib(const TargetLibraryInfo &TLI,
                                     CallInst &CI) {
   if (!CI.getCalledFunction()) {
@@ -173,21 +229,72 @@
   return false;
 }
 
+static bool replaceInstructionWithCallToVeclib(const TargetLibraryInfo &TLI,
+                                               Instruction &I) {
+  // We only have TLI mappings for SVE.
+  if (!I.getType()->isScalableTy()) {
+    return false;
+  }
+  auto *VectorArgTy = dyn_cast<VectorType>(I.getType());
+  if (!VectorArgTy) {
+    return false;
+  }
+  ElementCount NumElements = VectorArgTy->getElementCount();
+  Type *ElementType = VectorArgTy->getElementType();
+  StringRef ScalarName =
+      (ElementType->isFloatTy())
+          ? TLI.getName(LibFunc_fmodf)
+          : ((ElementType->isDoubleTy()) ? TLI.getName(LibFunc_fmod) : "");
+  if (!ScalarName.empty()) {
+    if (!TLI.isFunctionVectorizable(ScalarName)) {
+      // The TargetLibraryInfo does not contain a vectorized version of
+      // the scalar function.
+      return false;
+    }
+    const std::string TLINameUnmasked =
+        std::string(TLI.getVectorizedFunction(ScalarName, NumElements));
+    const std::string TLINameMasked =
+        std::string(TLI.getVectorizedFunction(ScalarName, NumElements, true));
+    LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Looking up TLI mapping for `"
+                      << ScalarName << "` and vector width " << NumElements
+                      << ".\n");
+    if (!TLINameUnmasked.empty()) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found unmasked TLI function `"
+                        << TLINameUnmasked << "`.\n");
+      return replaceInstructionWithTLIFunction(I, TLINameUnmasked, false,
+                                               NumElements, ElementType);
+    } else if (!TLINameMasked.empty()) {
+      LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Found masked TLI function `"
+                        << TLINameMasked << "`.\n");
+      return replaceInstructionWithTLIFunction(I, TLINameMasked, true,
+                                               NumElements, ElementType);
+    }
+  }
+  return false;
+}
+
 static bool runImpl(const TargetLibraryInfo &TLI, Function &F) {
   bool Changed = false;
-  SmallVector<CallInst *> ReplacedCalls;
+  SmallVector<Instruction *> ReplacedCalls;
   for (auto &I : instructions(F)) {
     if (auto *CI = dyn_cast<CallInst>(&I)) {
       if (replaceWithCallToVeclib(TLI, *CI)) {
-        ReplacedCalls.push_back(CI);
+        ReplacedCalls.push_back(&I);
+        Changed = true;
+      }
+    } else if (I.getOpcode() == Instruction::FRem) {
+      // If there is a suitable TLI mapping for FRem instruction,
+      // replace the instruction.
+      if (replaceInstructionWithCallToVeclib(TLI, I)) {
+        ReplacedCalls.push_back(&I);
         Changed = true;
       }
     }
   }
-  // Erase the calls to the intrinsics that have been replaced
-  // with calls to the vector library.
-  for (auto *CI : ReplacedCalls) {
-    CI->eraseFromParent();
+  // Erase the calls to the intrinsics and the instructions that have been
+  // replaced with calls to the vector library.
+  for (auto *I : ReplacedCalls) {
+    I->eraseFromParent();
   }
   return Changed;
 }
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-armpl.ll
@@ -377,4 +377,26 @@
   ret <vscale x 4 x float> %1
 }
 
+; NOTE: TLI mappings for FREM instruction.
+
+define <vscale x 2 x double> @frem_vscale_f64(<vscale x 2 x double> %in) #0 {
+; CHECK-LABEL: define <vscale x 2 x double> @frem_vscale_f64
+; CHECK-SAME: (<vscale x 2 x double> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @armpl_svfmod_f64_x(<vscale x 2 x double> [[IN]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 7.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %out = frem <vscale x 2 x double> %in, shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 7.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 4 x float> @frem_vscale_f32(<vscale x 4 x float> %in)  #0 {
+; CHECK-LABEL: define <vscale x 4 x float> @frem_vscale_f32
+; CHECK-SAME: (<vscale x 4 x float> [[IN:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @armpl_svfmod_f32_x(<vscale x 4 x float> [[IN]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 7.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %out = frem <vscale x 4 x float> %in, shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 7.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x float> %out
+}
+
 attributes #0 = { "target-features"="+sve" }
diff --git a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
--- a/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
+++ b/llvm/test/CodeGen/AArch64/replace-intrinsics-with-veclib-sleef-scalable.ll
@@ -365,6 +365,26 @@
   ret <vscale x 4 x float> %1
 }
 
+; NOTE: TLI mapping for FREM instruction.
+
+define <vscale x 2 x double> @frem_vscale_f64(<vscale x 2 x double> %in) {
+; CHECK-LABEL: @frem_vscale_f64(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[IN:%.*]], <vscale x 2 x double> shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 7.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer), <vscale x 2 x i1> shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+;
+  %out = frem <vscale x 2 x double> %in, shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 7.000000e+00, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+  ret <vscale x 2 x double> %out
+}
+
+define <vscale x 4 x float> @frem_vscale_f32(<vscale x 4 x float> %in) {
+; CHECK-LABEL: @frem_vscale_f32(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[IN:%.*]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 7.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
+; CHECK-NEXT:    ret <vscale x 4 x float> [[TMP1]]
+;
+  %out = frem <vscale x 4 x float> %in, shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 7.000000e+00, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+  ret <vscale x 4 x float> %out
+}
+
 declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
 declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.copysign.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sleef-calls-aarch64.ll
@@ -1,6 +1,6 @@
 ; Do NOT use -O3. It will lower exp2 to ldexp, and the test will fail.
-; RUN: opt -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,NEON
-; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -replace-with-veclib < %s | opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S | FileCheck %s --check-prefixes=CHECK,SVE
+; RUN: opt -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,NEON
+; RUN: opt -mattr=+sve -vector-library=sleefgnuabi -passes=inject-tli-mappings,loop-unroll,loop-vectorize -S < %s | FileCheck %s --check-prefixes=CHECK,SVE
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64-unknown-linux-gnu"
@@ -535,6 +535,55 @@
   ret void
 }
 
+declare double @fmod(double, double) #0
+declare float @fmodf(float, float) #0
+
+define void @fmod_f64(double* nocapture %varray) {
+  ; CHECK-LABEL: @fmod_f64(
+  ; SVE:      [[TMP5:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_fmod(<vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x double> [[TMP4:%.*]], <vscale x 2 x i1> {{.*}})
+  ; CHECK:    ret void
+  ;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @fmod(double %conv, double %conv)
+  %arrayidx = getelementptr inbounds double, double* %varray, i64 %iv
+  store double %call, double* %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
+define void @fmod_f32(float* nocapture %varray) {
+  ; CHECK-LABEL: @fmod_f32(
+  ; SVE:      [[TMP5:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_fmodf(<vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x float> [[TMP4:%.*]], <vscale x 4 x i1> {{.*}})
+  ; CHECK:    ret void
+  ;
+  entry:
+  br label %for.body
+
+  for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to float
+  %call = tail call float @fmodf(float %conv, float %conv)
+  %arrayidx = getelementptr inbounds float, float* %varray, i64 %iv
+  store float %call, float* %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+  for.end:
+  ret void
+}
+
 declare double @lgamma(double) #0
 declare float @lgammaf(float) #0
 declare double @llvm.lgamma.f64(double) #0