diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5585,6 +5585,7 @@
   }
   SmallVector<SDValue, 16> ArgValues;
   unsigned ExtraArgLocs = 0;
+  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i - ExtraArgLocs];
 
@@ -5627,11 +5628,13 @@
       else if (RegVT == MVT::f128 || RegVT.is128BitVector())
         RC = &AArch64::FPR128RegClass;
       else if (RegVT.isScalableVector() &&
-               RegVT.getVectorElementType() == MVT::i1)
+               RegVT.getVectorElementType() == MVT::i1) {
+        FuncInfo->setSVE(true);
         RC = &AArch64::PPRRegClass;
-      else if (RegVT.isScalableVector())
+      } else if (RegVT.isScalableVector()) {
+        FuncInfo->setSVE(true);
         RC = &AArch64::ZPRRegClass;
-      else
+      } else
         llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering");
 
       // Transform the arguments in physical registers into virtual ones.
@@ -5768,7 +5771,6 @@
   assert((ArgLocs.size() + ExtraArgLocs) == Ins.size());
 
   // varargs
-  AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   if (isVarArg) {
     if (!Subtarget->isTargetDarwin() || IsWin64) {
       // The AAPCS variadic function ABI is identical to the non-variadic
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -175,6 +175,8 @@
   /// The stack slot where the Swift asynchronous context is stored.
   int SwiftAsyncContextFrameIdx = std::numeric_limits<int>::max();
 
+  bool IsSVE = false;
+
   /// True if the function need unwind information.
   mutable Optional<bool> NeedsDwarfUnwindInfo;
 
@@ -184,6 +186,9 @@
 public:
   explicit AArch64FunctionInfo(MachineFunction &MF);
 
+  bool isSVE() const { return IsSVE; };
+  void setSVE(bool s) { IsSVE = s; };
+
   void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI);
 
   unsigned getBytesInStackArgArea() const { return BytesInStackArgArea; }
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -67,11 +67,8 @@
 }
 
 bool AArch64RegisterInfo::hasSVEArgsOrReturn(const MachineFunction *MF) {
-  const Function &F = MF->getFunction();
-  return isa<ScalableVectorType>(F.getReturnType()) ||
-         any_of(F.args(), [](const Argument &Arg) {
-           return isa<ScalableVectorType>(Arg.getType());
-         });
+  return (MF->getInfo<AArch64FunctionInfo>()->isSVE() ||
+          isa<ScalableVectorType>(MF->getFunction().getReturnType()));
 }
 
 const MCPhysReg *
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll
@@ -188,6 +188,193 @@
   ret double %x0
 }
 
+; Use AAVPCS, SVE register in z0-z7 used
+
+define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, <vscale x 4 x i32> %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, i32 * %ptr) nounwind {
+; CHECK-LABEL: aavpcs1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x8, x9, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x8]
+; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x7]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z2.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z4.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z5.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z6.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z7.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z24.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z3.s }, p0, [x9]
+; CHECK-NEXT:    ret
+entry:
+  %ptr1.bc = bitcast i32 * %ptr to <vscale x 4 x i32> *
+  store volatile <vscale x 4 x i32> %s7, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s8, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s9, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s11, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s12, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s13, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s14, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s15, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s16, <vscale x 4 x i32>* %ptr1.bc
+  ret void
+}
+
+; Use AAVPCS, SVE register in z0-z7 used
+
+define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, <vscale x 4 x float> %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12,<vscale x 4 x float> %s13,<vscale x 4 x float> %s14,<vscale x 4 x float> %s15,<vscale x 4 x float> %s16,float * %ptr) nounwind {
+; CHECK-LABEL: aavpcs2:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldp x8, x9, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x7]
+; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x6]
+; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x5]
+; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x4]
+; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x3]
+; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x1]
+; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x0]
+; CHECK-NEXT:    st1w { z7.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z24.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z6.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z5.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z4.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z3.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z2.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x9]
+; CHECK-NEXT:    ret
+entry:
+  %ptr1.bc = bitcast float * %ptr to <vscale x 4 x float> *
+  store volatile <vscale x 4 x float> %s7, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr1.bc
+  ret void
+}
+
+; Use AAVPCS, no SVE register in z0-z7 used (floats occupy z0-z7) but predicate arg is used
+
+define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, <vscale x 16 x i1> %p0, float * %ptr) nounwind {
+; CHECK-LABEL: aavpcs3:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr x8, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x7]
+; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x6]
+; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x5]
+; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x4]
+; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x3]
+; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x2]
+; CHECK-NEXT:    ld1w { z7.s }, p0/z, [x1]
+; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x0]
+; CHECK-NEXT:    ldr x8, [sp, #16]
+; CHECK-NEXT:    st1w { z24.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z7.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z6.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z5.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z4.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z3.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z2.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
+; CHECK-NEXT:    ret
+entry:
+  %ptr1.bc = bitcast float * %ptr to <vscale x 4 x float> *
+  store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s10, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr1.bc
+  ret void
+}
+
+; use AAVPCS, SVE register in z0-z7 used (i32s dont occupy z0-z7)
+
+define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, <vscale x 4 x i32> %s8, <vscale x 4 x i32> %s9, <vscale x 4 x i32> %s10, <vscale x 4 x i32> %s11, <vscale x 4 x i32> %s12, <vscale x 4 x i32> %s13, <vscale x 4 x i32> %s14, <vscale x 4 x i32> %s15, <vscale x 4 x i32> %s16, <vscale x 4 x i32> %s17, i32 * %ptr) nounwind {
+; CHECK-LABEL: aavpcs4:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr x8, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ldr x9, [sp, #16]
+; CHECK-NEXT:    ld1w { z24.s }, p0/z, [x8]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z2.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z3.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z4.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z5.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z6.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z7.s }, p0, [x9]
+; CHECK-NEXT:    st1w { z24.s }, p0, [x9]
+; CHECK-NEXT:    ret
+entry:
+  %ptr1.bc = bitcast i32 * %ptr to <vscale x 4 x i32> *
+  store volatile <vscale x 4 x i32> %s8, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s9, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s10, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s11, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s12, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s13, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s14, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s15, <vscale x 4 x i32>* %ptr1.bc
+  store volatile <vscale x 4 x i32> %s16, <vscale x 4 x i32>* %ptr1.bc
+  ret void
+}
+
+; Use AAPCS, no SVE register in z0-7 used (floats occupy z0-z7)
+
+define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, <vscale x 4 x float> %s8, <vscale x 4 x float> %s9, <vscale x 4 x float> %s10, <vscale x 4 x float> %s11, <vscale x 4 x float> %s12, <vscale x 4 x float> %s13, <vscale x 4 x float> %s14, <vscale x 4 x float> %s15, <vscale x 4 x float> %s16, <vscale x 4 x float> %s17, float * %ptr) nounwind {
+; CHECK-LABEL: aapcs1:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    ldr x8, [sp]
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x8]
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x7]
+; CHECK-NEXT:    ld1w { z2.s }, p0/z, [x6]
+; CHECK-NEXT:    ld1w { z3.s }, p0/z, [x5]
+; CHECK-NEXT:    ld1w { z4.s }, p0/z, [x4]
+; CHECK-NEXT:    ld1w { z5.s }, p0/z, [x3]
+; CHECK-NEXT:    ld1w { z6.s }, p0/z, [x2]
+; CHECK-NEXT:    ld1w { z7.s }, p0/z, [x1]
+; CHECK-NEXT:    ld1w { z16.s }, p0/z, [x0]
+; CHECK-NEXT:    ldr x8, [sp, #16]
+; CHECK-NEXT:    st1w { z16.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z7.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z6.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z5.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z4.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z3.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z2.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z1.s }, p0, [x8]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x8]
+; CHECK-NEXT:    ret
+entry:
+  %ptr1.bc = bitcast float * %ptr to <vscale x 4 x float> *
+  store volatile <vscale x 4 x float> %s8, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s9, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s10, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s11, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s12, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s13, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s14, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s15, <vscale x 4 x float>* %ptr1.bc
+  store volatile <vscale x 4 x float> %s16, <vscale x 4 x float>* %ptr1.bc
+  ret void
+}
+
 declare float @callee1(float, <vscale x 8 x double>, <vscale x 8 x double>, <vscale x 2 x double>)
 declare float @callee2(i32, i32, i32, i32, i32, i32, i32, i32, float, <vscale x 8 x double>, <vscale x 8 x double>)
 declare float @callee3(float, float, <vscale x 8 x double>, <vscale x 6 x double>, <vscale x 2 x double>)