diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -654,9 +654,9 @@
   }
 
   /// Return an expression for sizeof ScalableTy that is type IntTy, where
-  /// ScalableTy is a scalable vector type.
-  const SCEV *getSizeOfScalableVectorExpr(Type *IntTy,
-                                          ScalableVectorType *ScalableTy);
+  /// ScalableTy is a scalable vector type or an AArch64 predicate-as-counter
+  /// (opaque) type.
+  const SCEV *getSizeOfScalableTypeExpr(Type *IntTy, Type *ScalableTy);
 
   /// Return an expression for the alloc size of AllocTy that is type IntTy
   const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h
--- a/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -122,7 +122,7 @@
     /// Test if the given EVT has zero size, this will fail if called on a
     /// scalable type
     bool isZeroSized() const {
-      return !isScalableVector() && getSizeInBits() == 0;
+      return getSizeInBits().getKnownMinValue() == 0;
     }
 
     /// Test if the given EVT is simple (as opposed to being extended).
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -274,7 +274,7 @@
     // If it's a primitive, it is always sized.
     if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
         getTypeID() == PointerTyID || getTypeID() == X86_MMXTyID ||
-        getTypeID() == X86_AMXTyID)
+        getTypeID() == X86_AMXTyID || getTypeID() == AArch64SvcountTyID)
       return true;
     // If it is not something that can have a size (e.g. a function or label),
     // it doesn't have a size.
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -281,9 +281,10 @@
       externref      = 184,    // WebAssembly's externref type
       x86amx         = 185,    // This is an X86 AMX value
       i64x8          = 186,    // 8 Consecutive GPRs (AArch64)
+      aarch64svcount = 187,    // AArch64 predicate-as-counter
 
       FIRST_VALUETYPE =  1,    // This is always the beginning of the list.
-      LAST_VALUETYPE = i64x8,  // This always remains at the end of the list.
+      LAST_VALUETYPE = aarch64svcount, // This always remains at the end of the list.
       VALUETYPE_SIZE = LAST_VALUETYPE + 1,
 
       // This is the current maximum for LAST_VALUETYPE.
@@ -927,6 +928,7 @@
       case v2i8:
       case v1i16:
       case v1f16: return TypeSize::Fixed(16);
+      case aarch64svcount:
       case nxv16i1:
       case nxv2i8:
       case nxv1i16:
diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp
--- a/llvm/lib/Analysis/Loads.cpp
+++ b/llvm/lib/Analysis/Loads.cpp
@@ -202,7 +202,7 @@
     const TargetLibraryInfo *TLI) {
   // For unsized types or scalable vectors we don't know exactly how many bytes
   // are dereferenced, so bail out.
-  if (!Ty->isSized() || isa<ScalableVectorType>(Ty))
+  if (!Ty->isSized() || isa<ScalableVectorType>(Ty) || Ty->isAArch64SvcountTy())
     return false;
 
   // When dereferenceability information is provided by a dereferenceable
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4250,8 +4250,10 @@
 }
 
 const SCEV *
-ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy,
-                                             ScalableVectorType *ScalableTy) {
+ScalarEvolution::getSizeOfScalableTypeExpr(Type *IntTy, Type *ScalableTy) {
+  assert((isa<ScalableVectorType>(ScalableTy) ||
+          ScalableTy->isAArch64SvcountTy()) &&
+         "Expected a scalable type");
   Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo());
   Constant *One = ConstantInt::get(IntTy, 1);
   Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One);
@@ -4262,8 +4264,9 @@
 }
 
 const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
-  if (auto *ScalableAllocTy = dyn_cast<ScalableVectorType>(AllocTy))
-    return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy);
+  if (isa<ScalableVectorType>(AllocTy) || AllocTy->isAArch64SvcountTy())
+    return getSizeOfScalableTypeExpr(IntTy, AllocTy);
+
   // We can bypass creating a target-independent constant expression and then
   // folding it back into a ConstantInt. This is just a compile-time
   // optimization.
@@ -4271,8 +4274,9 @@
 }
 
 const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) {
-  if (auto *ScalableStoreTy = dyn_cast<ScalableVectorType>(StoreTy))
-    return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy);
+  if (isa<ScalableVectorType>(StoreTy) || StoreTy->isAArch64SvcountTy())
+    return getSizeOfScalableTypeExpr(IntTy, StoreTy);
+
   // We can bypass creating a target-independent constant expression and then
   // folding it back into a ConstantInt. This is just a compile-time
   // optimization.
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -7696,7 +7696,7 @@
   // whereas scalable vectors would have to be shifted by
   // <2log(vscale) + number of bits> in order to store the
   // low/high parts. Bailing out for now.
-  if (isa<ScalableVectorType>(StoreType))
+  if (isa<ScalableVectorType>(StoreType) || StoreType->isAArch64SvcountTy())
     return false;
 
   if (!DL.typeSizeEqualsStoreSize(StoreType) ||
diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp
--- a/llvm/lib/CodeGen/LowLevelType.cpp
+++ b/llvm/lib/CodeGen/LowLevelType.cpp
@@ -31,7 +31,7 @@
     return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
   }
 
-  if (Ty.isSized()) {
+  if (Ty.isSized() && !Ty.isAArch64SvcountTy()) {
     // Aggregates are no different from real scalars as far as GlobalISel is
     // concerned.
     auto SizeInBits = DL.getTypeSizeInBits(&Ty);
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17161,6 +17161,9 @@
   SDValue Chain = LD->getChain();
   SDValue Ptr   = LD->getBasePtr();
 
+  if (N->getValueType(0) == MVT::aarch64svcount)
+    return SDValue();
+
   // If load is not volatile and there are no uses of the loaded value (and
   // the updated indexed value in case of indexed loads), change uses of the
   // chain value into uses of the chain input (i.e. delete the dead load).
@@ -19385,6 +19388,9 @@
   SDValue Value = ST->getValue();
   SDValue Ptr   = ST->getBasePtr();
 
+  if (Value.getValueType() == MVT::aarch64svcount)
+    return SDValue();
+
   // If this is a store of a bit convert, store the input value if the
   // resultant store does not need a higher alignment than the original.
   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -494,7 +494,6 @@
     return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
                                 CallConv);
 
-  unsigned PartBits = PartVT.getSizeInBits();
   unsigned OrigNumParts = NumParts;
   assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
          "Copying to an illegal type!");
@@ -510,6 +509,7 @@
     return;
   }
 
+  unsigned PartBits = PartVT.getSizeInBits();
   if (NumParts * PartBits > ValueVT.getSizeInBits()) {
     // If the parts cover more bits than the value has, promote the value.
     if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -173,6 +173,8 @@
   case MVT::Untyped:   return "Untyped";
   case MVT::funcref:   return "funcref";
   case MVT::externref: return "externref";
+  case MVT::aarch64svcount:
+    return "aarch64svcount";
   }
 }
 
@@ -202,6 +204,8 @@
   case MVT::f128:    return Type::getFP128Ty(Context);
   case MVT::ppcf128: return Type::getPPC_FP128Ty(Context);
   case MVT::x86mmx:  return Type::getX86_MMXTy(Context);
+  case MVT::aarch64svcount:
+    return Type::getAArch64SvcountTy(Context);
   case MVT::x86amx:  return Type::getX86_AMXTy(Context);
   case MVT::i64x8:   return IntegerType::get(Context, 512);
   case MVT::externref:
@@ -557,6 +561,8 @@
   case Type::DoubleTyID:    return MVT(MVT::f64);
   case Type::X86_FP80TyID:  return MVT(MVT::f80);
   case Type::X86_MMXTyID:   return MVT(MVT::x86mmx);
+  case Type::AArch64SvcountTyID:
+    return MVT(MVT::aarch64svcount);
   case Type::X86_AMXTyID:   return MVT(MVT::x86amx);
   case Type::FP128TyID:     return MVT(MVT::f128);
   case Type::PPC_FP128TyID: return MVT(MVT::ppcf128);
diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp
--- a/llvm/lib/IR/DataLayout.cpp
+++ b/llvm/lib/IR/DataLayout.cpp
@@ -797,6 +797,7 @@
     // layout.
     return Align(PowerOf2Ceil(BitWidth / 8));
   }
+  case Type::AArch64SvcountTyID:
   case Type::X86_MMXTyID:
   case Type::FixedVectorTyID:
   case Type::ScalableVectorTyID: {
diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp
--- a/llvm/lib/Support/LowLevelType.cpp
+++ b/llvm/lib/Support/LowLevelType.cpp
@@ -21,7 +21,7 @@
     init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector,
          VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(),
          /*AddressSpace=*/0);
-  } else if (VT.isValid()) {
+  } else if (VT.isValid() && VT != MVT::aarch64svcount) {
     // Aggregates are no different from real scalars as far as GlobalISel is
     // concerned.
     init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true,
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -82,9 +82,9 @@
             nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
            CCPassIndirect<i64>>,
 
-  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount],
            CCAssignToReg<[P0, P1, P2, P3]>>,
-  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount],
            CCPassIndirect<i64>>,
 
   // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers,
@@ -149,7 +149,7 @@
             nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
            CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>,
 
-  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+  CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount],
            CCAssignToReg<[P0, P1, P2, P3]>>
 ]>;
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -375,6 +375,10 @@
     }
   }
 
+  if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) {
+    addRegisterClass(MVT::aarch64svcount, &AArch64::PPRRegClass);
+  }
+
   // Compute derived properties from the register classes
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
@@ -6110,6 +6114,9 @@
                RegVT.getVectorElementType() == MVT::i1) {
         FuncInfo->setIsSVECC(true);
         RC = &AArch64::PPRRegClass;
+      } else if (RegVT == MVT::aarch64svcount) {
+        FuncInfo->setIsSVECC(true);
+        RC = &AArch64::PPRRegClass;
       } else if (RegVT.isScalableVector()) {
         FuncInfo->setIsSVECC(true);
         RC = &AArch64::ZPRRegClass;
@@ -6145,6 +6152,7 @@
         break;
       case CCValAssign::Indirect:
         assert((VA.getValVT().isScalableVector() ||
+                VA.getValVT() == MVT::aarch64svcount ||
                 Subtarget->isWindowsArm64EC()) &&
                "Indirect arguments should be scalable on most subtargets");
         break;
@@ -6225,9 +6233,10 @@
     }
 
     if (VA.getLocInfo() == CCValAssign::Indirect) {
-      assert(
-          (VA.getValVT().isScalableVector() || Subtarget->isWindowsArm64EC()) &&
-          "Indirect arguments should be scalable on most subtargets");
+      assert((VA.getValVT().isScalableVector() ||
+              VA.getValVT() == MVT::aarch64svcount ||
+              Subtarget->isWindowsArm64EC()) &&
+             "Indirect arguments should be scalable on most subtargets");
 
       uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
       unsigned NumParts = 1;
@@ -7088,7 +7097,8 @@
       Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
       break;
     case CCValAssign::Indirect:
-      bool isScalable = VA.getValVT().isScalableVector();
+      bool isScalable = VA.getValVT().isScalableVector() ||
+                        VA.getValVT() == MVT::aarch64svcount;
       assert((isScalable || Subtarget->isWindowsArm64EC()) &&
              "Indirect arguments should be scalable on most subtargets");
 
@@ -14297,6 +14307,9 @@
     return false;
 
   // FIXME: Update this method to support scalable addressing modes.
+  if (Ty->isAArch64SvcountTy())
+    return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale;
+
   if (isa<ScalableVectorType>(Ty)) {
     uint64_t VecElemNumBytes =
         DL.getTypeSizeInBits(cast<VectorType>(Ty)->getElementType()) / 8;
@@ -21904,15 +21917,19 @@
 }
 
 bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
-  if (isa<ScalableVectorType>(Inst.getType()))
+  auto IsScalable = [](const Type *T) {
+    return isa<ScalableVectorType>(T) || T->isAArch64SvcountTy();
+  };
+
+  if (IsScalable(Inst.getType()))
     return true;
 
   for (unsigned i = 0; i < Inst.getNumOperands(); ++i)
-    if (isa<ScalableVectorType>(Inst.getOperand(i)->getType()))
+    if (IsScalable(Inst.getOperand(i)->getType()))
       return true;
 
   if (const AllocaInst *AI = dyn_cast<AllocaInst>(&Inst)) {
-    if (isa<ScalableVectorType>(AI->getAllocatedType()))
+    if (IsScalable(AI->getAllocatedType()))
       return true;
   }
 
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -873,7 +873,7 @@
 // SVE predicate register classes.
 class PPRClass<int lastreg> : RegisterClass<
                                   "AArch64",
-                                  [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16,
+                                  [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16,
                                   (sequence "P%u", 0, lastreg)> {
   let Size = 16;
 }
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2703,6 +2703,7 @@
   }
 
   defm Pat_Store_P16 : unpred_store_predicate<nxv16i1, STR_PXI>;
+  defm Pat_Store_PredAsCount : unpred_store_predicate<aarch64svcount, STR_PXI>;
 
   multiclass unpred_load_predicate<ValueType Ty, Instruction Load> {
     def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))),
@@ -2713,6 +2714,7 @@
   }
 
   defm Pat_Load_P16 : unpred_load_predicate<nxv16i1, LDR_PXI>;
+  defm Pat_Load_PredAsCount : unpred_load_predicate<aarch64svcount, LDR_PXI>;
 
   multiclass ld1<Instruction RegRegInst, Instruction RegImmInst, ValueType Ty,
                  SDPatternOperator Load, ValueType PredTy, ValueType MemVT, ComplexPattern AddrCP> {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -525,11 +525,14 @@
 }
 
 bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
+  auto IsScalable = [](const Type *T) {
+    return isa<ScalableVectorType>(T) || T->isAArch64SvcountTy();
+  };
+
   auto &F = MF.getFunction();
-  if (isa<ScalableVectorType>(F.getReturnType()))
-    return true;
-  if (llvm::any_of(F.args(), [](const Argument &A) {
-        return isa<ScalableVectorType>(A.getType());
+  if (IsScalable(F.getReturnType()) ||
+      llvm::any_of(F.args(), [&IsScalable](const Argument &A) {
+        return IsScalable(A.getType());
       }))
     return true;
   const auto &ST = MF.getSubtarget<AArch64Subtarget>();
diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp
--- a/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -4620,7 +4620,7 @@
   // Skip alloca forms that this analysis can't handle.
   auto *AT = AI.getAllocatedType();
   if (AI.isArrayAllocation() || !AT->isSized() || isa<ScalableVectorType>(AT) ||
-      DL.getTypeAllocSize(AT).getFixedSize() == 0)
+      AT->isAArch64SvcountTy() || DL.getTypeAllocSize(AT).getFixedSize() == 0)
     return false;
 
   bool Changed = false;
diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O3 -mtriple=aarch64 -mattr=+sme -S < %s | FileCheck %s
+
+; Test PHI nodes are allowed with
+define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val0, aarch64_svcount %val1, ptr %iptr, ptr %pptr, i64 %N) nounwind {
+; CHECK-LABEL: @test_alloca_store_reload(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 0, ptr [[IPTR:%.*]], align 4
+; CHECK-NEXT:    store aarch64_svcount [[VAL0:%.*]], ptr [[PPTR:%.*]], align 2
+; CHECK-NEXT:    [[I1_PEEL:%.*]] = icmp eq i64 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[I1_PEEL]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]]
+; CHECK:       loop.body:
+; CHECK-NEXT:    [[IND:%.*]] = phi i64 [ [[IND_NEXT:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[IPTR_GEP:%.*]] = getelementptr i64, ptr [[IPTR]], i64 [[IND]]
+; CHECK-NEXT:    store i64 [[IND]], ptr [[IPTR_GEP]], align 4
+; CHECK-NEXT:    store aarch64_svcount [[VAL1:%.*]], ptr [[PPTR]], align 2
+; CHECK-NEXT:    [[IND_NEXT]] = add i64 [[IND]], 1
+; CHECK-NEXT:    [[I1:%.*]] = icmp eq i64 [[IND]], [[N]]
+; CHECK-NEXT:    br i1 [[I1]], label [[LOOP_EXIT]], label [[LOOP_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       loop.exit:
+; CHECK-NEXT:    [[PHI_LCSSA:%.*]] = phi aarch64_svcount [ [[VAL0]], [[ENTRY]] ], [ [[VAL1]], [[LOOP_BODY]] ]
+; CHECK-NEXT:    ret aarch64_svcount [[PHI_LCSSA]]
+;
+entry:
+  br label %loop.body
+
+loop.body:
+  %ind = phi i64 [0, %entry], [%ind.next, %loop.body]
+  %phi = phi aarch64_svcount [%val0, %entry], [%val1, %loop.body]
+  %iptr.gep = getelementptr i64, ptr %iptr, i64 %ind
+  store i64 %ind, ptr %iptr.gep
+  store aarch64_svcount %phi, ptr %pptr
+  %ind.next = add i64 %ind, 1
+  %i1 = icmp eq i64 %ind, %N
+  br i1 %i1, label %loop.exit, label %loop.body
+
+loop.exit:
+  ret aarch64_svcount %phi
+}
diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,CHECKO0
+; RUN: llc -O3 -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,CHECKO3
+
+;
+; Test simple loads, stores and return.
+;
+define aarch64_svcount @test_load(ptr %ptr) nounwind {
+; CHECK-LABEL: test_load:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr p0, [x0]
+; CHECK-NEXT:    ret
+  %res = load aarch64_svcount, ptr %ptr
+  ret aarch64_svcount %res
+}
+
+define void @test_store(ptr %ptr, aarch64_svcount %val) nounwind {
+; CHECK-LABEL: test_store:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str p0, [x0]
+; CHECK-NEXT:    ret
+  store aarch64_svcount %val, ptr %ptr
+  ret void
+}
+
+define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val) nounwind {
+; CHECKO0-LABEL: test_alloca_store_reload:
+; CHECKO0:       // %bb.0:
+; CHECKO0-NEXT:    sub sp, sp, #16
+; CHECKO0-NEXT:    add x8, sp, #14
+; CHECKO0-NEXT:    str p0, [x8]
+; CHECKO0-NEXT:    ldr p0, [x8]
+; CHECKO0-NEXT:    add sp, sp, #16
+; CHECKO0-NEXT:    ret
+;
+; CHECKO3-LABEL: test_alloca_store_reload:
+; CHECKO3:       // %bb.0:
+; CHECKO3-NEXT:    sub sp, sp, #16
+; CHECKO3-NEXT:    add x8, sp, #14
+; CHECKO3-NEXT:    str p0, [x8]
+; CHECKO3-NEXT:    add sp, sp, #16
+; CHECKO3-NEXT:    ret
+  %ptr = alloca aarch64_svcount, align 1
+  store aarch64_svcount %val, ptr %ptr
+  %res = load aarch64_svcount, ptr %ptr
+  ret aarch64_svcount %res
+}
+
+;
+; Test passing as arguments (from perspective of callee)
+;
+
+define aarch64_svcount @test_return_arg1(aarch64_svcount %arg0, aarch64_svcount %arg1) nounwind {
+; CHECK-LABEL: test_return_arg1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov p0.b, p1.b
+; CHECK-NEXT:    ret
+  ret aarch64_svcount %arg1
+}
+
+define aarch64_svcount @test_return_arg4(aarch64_svcount %arg0, aarch64_svcount %arg1, aarch64_svcount %arg2, aarch64_svcount %arg3, aarch64_svcount %arg4) nounwind {
+; CHECK-LABEL: test_return_arg4:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr p0, [x0]
+; CHECK-NEXT:    ret
+  ret aarch64_svcount %arg4
+}
+
+;
+; Test passing as arguments (from perspective of caller)
+;
+
+declare void @take_svcount_1(aarch64_svcount %arg)
+define void @test_pass_1arg(aarch64_svcount %arg) nounwind {
+; CHECK-LABEL: test_pass_1arg:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT:    bl take_svcount_1
+; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT:    ret
+  call void @take_svcount_1(aarch64_svcount %arg)
+  ret void
+}
+
+declare void @take_svcount_5(aarch64_svcount %arg0, aarch64_svcount %arg1, aarch64_svcount %arg2, aarch64_svcount %arg3, aarch64_svcount %arg4)
+define void @test_pass_5args(aarch64_svcount %arg) nounwind {
+; CHECKO0-LABEL: test_pass_5args:
+; CHECKO0:       // %bb.0:
+; CHECKO0-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECKO0-NEXT:    addvl sp, sp, #-1
+; CHECKO0-NEXT:    mov p3.b, p0.b
+; CHECKO0-NEXT:    str p3, [sp, #7, mul vl]
+; CHECKO0-NEXT:    addpl x0, sp, #7
+; CHECKO0-NEXT:    mov p0.b, p3.b
+; CHECKO0-NEXT:    mov p1.b, p3.b
+; CHECKO0-NEXT:    mov p2.b, p3.b
+; CHECKO0-NEXT:    bl take_svcount_5
+; CHECKO0-NEXT:    addvl sp, sp, #1
+; CHECKO0-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECKO0-NEXT:    ret
+;
+; CHECKO3-LABEL: test_pass_5args:
+; CHECKO3:       // %bb.0:
+; CHECKO3-NEXT:    stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECKO3-NEXT:    addvl sp, sp, #-1
+; CHECKO3-NEXT:    addpl x0, sp, #7
+; CHECKO3-NEXT:    mov p1.b, p0.b
+; CHECKO3-NEXT:    mov p2.b, p0.b
+; CHECKO3-NEXT:    mov p3.b, p0.b
+; CHECKO3-NEXT:    str p0, [sp, #7, mul vl]
+; CHECKO3-NEXT:    bl take_svcount_5
+; CHECKO3-NEXT:    addvl sp, sp, #1
+; CHECKO3-NEXT:    ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECKO3-NEXT:    ret
+  call void @take_svcount_5(aarch64_svcount %arg, aarch64_svcount %arg, aarch64_svcount %arg, aarch64_svcount %arg, aarch64_svcount %arg)
+  ret void
+}
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll b/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll
@@ -0,0 +1,12 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val) nounwind {
+; CHECK-LABEL: @test_alloca_store_reload(
+; CHECK-NEXT:    ret aarch64_svcount [[VAL:%.*]]
+;
+  %ptr = alloca aarch64_svcount, align 1
+  store aarch64_svcount %val, ptr %ptr
+  %res = load aarch64_svcount, ptr %ptr
+  ret aarch64_svcount %res
+}
diff --git a/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll b/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -sroa -S < %s | FileCheck %s
+
+define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val) nounwind {
+; CHECK-LABEL: @test_alloca_store_reload(
+; CHECK-NEXT:    [[PTR:%.*]] = alloca aarch64_svcount, align 1
+; CHECK-NEXT:    store aarch64_svcount [[VAL:%.*]], ptr [[PTR]], align 2
+; CHECK-NEXT:    [[RES:%.*]] = load aarch64_svcount, ptr [[PTR]], align 2
+; CHECK-NEXT:    ret aarch64_svcount [[RES]]
+;
+  %ptr = alloca aarch64_svcount, align 1
+  store aarch64_svcount %val, ptr %ptr
+  %res = load aarch64_svcount, ptr %ptr
+  ret aarch64_svcount %res
+}
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -77,6 +77,7 @@
   case MVT::ppcf128:  return "MVT::ppcf128";
   case MVT::x86mmx:   return "MVT::x86mmx";
   case MVT::x86amx:   return "MVT::x86amx";
+  case MVT::aarch64svcount:   return "MVT::aarch64svcount";
   case MVT::i64x8:    return "MVT::i64x8";
   case MVT::Glue:     return "MVT::Glue";
   case MVT::isVoid:   return "MVT::isVoid";