diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7188,7 +7188,6 @@
   case Instruction::Mul:
   case Instruction::FMul:
   case Instruction::FDiv:
-  case Instruction::FRem:
   case Instruction::Shl:
   case Instruction::LShr:
   case Instruction::AShr:
@@ -7221,6 +7220,64 @@
         {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
         Op2Info, Operands, I);
   }
+  case Instruction::FRem: {
+    // Certain instructions can be cheaper to vectorize if they have a constant
+    // second vector operand. One example of this are shifts on x86.
+    Value *Op2 = I->getOperand(1);
+    auto Op2Info = TTI.getOperandInfo(Op2);
+    if (Op2Info.Kind == TargetTransformInfo::OK_AnyValue &&
+        Legal->isInvariant(Op2))
+      Op2Info.Kind = TargetTransformInfo::OK_UniformValue;
+
+    SmallVector<const Value *, 4> Operands(I->operand_values());
+    InstructionCost Cost = TTI.getArithmeticInstrCost(
+        I->getOpcode(), VectorTy, CostKind,
+        {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
+        Op2Info, Operands, I);
+    if (Cost != InstructionCost::getInvalid())
+      return Cost;
+    // We need to check if we have a lib function available as we don't want
+    // to emit frem instructions operation on scalable vectors for targets
+    // on which such instructions can not be code generated.
+    if (VF.isScalable()) {
+      if (TLI) {
+        Module *M = I->getModule();
+        StringRef ScalarFnName;
+        Type *Ty = I->getType();
+        if (Ty->isFloatTy())
+          ScalarFnName = TLI->getName(LibFunc_fmodf);
+        else if (Ty->isDoubleTy())
+          ScalarFnName = TLI->getName(LibFunc_fmod);
+        else
+          return InstructionCost::getInvalid();
+        Type *RetTy = ToVectorTy(Ty, VF);
+        SmallVector<Type *> Tys = {RetTy, RetTy};
+        Function *TLIFunc = nullptr;
+        StringRef TLIName = TLI->getVectorizedFunction(ScalarFnName, VF);
+        if (TLIName.empty()) {
+          TLIName = TLI->getVectorizedFunction(ScalarFnName, VF, true);
+          if (TLIName.empty())
+            return InstructionCost::getInvalid();
+          // Get the mask position.
+          std::optional<llvm::VFInfo> Info =
+              VFABI::tryDemangleForVFABI(TLIName, *M, VF);
+          if (!Info)
+            return InstructionCost::getInvalid();
+          unsigned MaskPos = Info->getParamIndexForOptionalMask().value();
+          Tys.insert(Tys.begin() + MaskPos,
+                     VectorType::get(Type::getInt1Ty(M->getContext()), VF));
+        }
+        TLIFunc = Function::Create(FunctionType::get(RetTy, Tys, false),
+                                   Function::ExternalLinkage, ScalarFnName, *M);
+        if (TLIFunc == nullptr)
+          return InstructionCost::getInvalid();
+        return TTI.getCallInstrCost(TLIFunc, RetTy, Tys,
+                                    TTI::TCK_RecipThroughput);
+      }
+      return InstructionCost::getInvalid();
+    }
+    return InstructionCost::getInvalid();
+  }
   case Instruction::FNeg: {
     return TTI.getArithmeticInstrCost(
         I->getOpcode(), VectorTy, CostKind,
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/frem.ll b/llvm/test/Transforms/LoopVectorize/AArch64/frem.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/frem.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt -mtriple aarch64-linux-generic -mattr=+sve -vector-library=sleefgnuabi -passes=loop-vectorize,instcombine -S < %s | FileCheck %s
+
+define void @fmod_vec(ptr noalias nocapture %a,
+; CHECK-LABEL: define void @fmod_vec
+; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], ptr noalias nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x double>, ptr [[TMP0]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds double, ptr [[TMP0]], i64 [[TMP2]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 2 x double>, ptr [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP4:%.*]] = frem fast <vscale x 2 x double> [[WIDE_LOAD]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x40091EB860000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP5:%.*]] = frem fast <vscale x 2 x double> [[WIDE_LOAD1]], shufflevector (<vscale x 2 x double> insertelement (<vscale x 2 x double> poison, double 0x40091EB860000000, i64 0), <vscale x 2 x double> poison, <vscale x 2 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 2 x double> [[TMP4]], ptr [[TMP6]], align 8
+; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, ptr [[TMP6]], i64 [[TMP8]]
+; CHECK-NEXT:    store <vscale x 2 x double> [[TMP5]], ptr [[TMP9]], align 8
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+  ptr noalias nocapture readonly %b)  #0 {
+entry:
+  br label %for.body
+for.body:                                         ; preds = %entry, %for.body
+  %i = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds double, ptr %b, i64 %i
+  %0 = load double, ptr %arrayidx, align 8
+  %1 = frem fast double %0, 0x40091EB860000000
+  %arrayidx2 = getelementptr inbounds double, ptr %a, i64 %i
+  store double %1, ptr %arrayidx2, align 8
+  %inc = add nuw nsw i64 %i, 1
+  %cmp = icmp ult i64 %inc, 256
+  br i1 %cmp, label %for.body, label %for.end
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+define void @fmodf_vec(ptr noalias nocapture %a,
+; CHECK-LABEL: define void @fmodf_vec
+; CHECK-SAME: (ptr noalias nocapture [[A:%.*]], ptr noalias nocapture readonly [[B:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP2:%.*]] = shl nuw nsw i64 [[TMP1]], 2
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, ptr [[TMP0]], i64 [[TMP2]]
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = frem fast <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x40091EB860000000, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP5:%.*]] = frem fast <vscale x 4 x float> [[WIDE_LOAD1]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0x40091EB860000000, i64 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer)
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[TMP4]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP8:%.*]] = shl nuw nsw i64 [[TMP7]], 2
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, ptr [[TMP6]], i64 [[TMP8]]
+; CHECK-NEXT:    store <vscale x 4 x float> [[TMP5]], ptr [[TMP9]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP11:%.*]] = shl nuw nsw i64 [[TMP10]], 3
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br i1 poison, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+  ptr noalias nocapture readonly %b)  #0 {
+entry:
+  br label %for.body
+for.body:                                         ; preds = %entry, %for.body
+  %i = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %arrayidx = getelementptr inbounds float, ptr %b, i64 %i
+  %0 = load float, ptr %arrayidx, align 4
+  %1 = frem fast float %0, 0x40091EB860000000
+  %arrayidx2 = getelementptr inbounds float, ptr %a, i64 %i
+  store float %1, ptr %arrayidx2, align 4
+  %inc = add nuw nsw i64 %i, 1
+  %cmp = icmp ult i64 %inc, 256
+  br i1 %cmp, label %for.body, label %for.end
+for.end:                                          ; preds = %for.body
+  ret void
+}
+
+attributes #0 = { vscale_range(1,16) }