Index: clang/include/clang/AST/ASTContext.h
===================================================================
--- clang/include/clang/AST/ASTContext.h
+++ clang/include/clang/AST/ASTContext.h
@@ -2086,10 +2086,6 @@
     return getTypeSizeInCharsIfKnown(QualType(Ty, 0));
   }
 
-  /// Returns the bitwidth of \p T, an SVE type attributed with
-  /// 'arm_sve_vector_bits'. Should only be called if T->isVLST().
-  unsigned getBitwidthForAttributedSveType(const Type *T) const;
-
   /// Return the ABI-specified alignment of a (complete) type \p T, in
   /// bits.
   unsigned getTypeAlign(QualType T) const { return getTypeInfo(T).Align; }
Index: clang/include/clang/AST/Type.h
===================================================================
--- clang/include/clang/AST/Type.h
+++ clang/include/clang/AST/Type.h
@@ -1925,14 +1925,16 @@
   bool isSizelessType() const;
   bool isSizelessBuiltinType() const;
 
-  /// Determines if this is a vector-length-specific type (VLST), i.e. a
-  /// sizeless type with the 'arm_sve_vector_bits' attribute applied.
-  bool isVLST() const;
   /// Determines if this is a sizeless type supported by the
   /// 'arm_sve_vector_bits' type attribute, which can be applied to a single
   /// SVE vector or predicate, excluding tuple types such as svint32x4_t.
   bool isVLSTBuiltinType() const;
 
+  /// Returns the representive type for the element of an SVE builtin type.
+  /// This is used to represent fixed-length SVE vectors created with the
+  /// 'arm_sve_vector_bits' type attribute as VectorType.
+  QualType getFixedLengthSveEltType(const ASTContext &Ctx) const;
+
   /// Types are partitioned into 3 broad categories (C99 6.2.5p1):
   /// object types, function types, and incomplete types.
 
@@ -3258,7 +3260,11 @@
     NeonVector,
 
     /// is ARM Neon polynomial vector
-    NeonPolyVector
+    NeonPolyVector,
+
+    /// is ARM fixed-length scalable vector
+    SveFixedLengthDataVector,
+    SveFixedLengthPredicateVector
   };
 
 protected:
Index: clang/include/clang/Basic/Attr.td
===================================================================
--- clang/include/clang/Basic/Attr.td
+++ clang/include/clang/Basic/Attr.td
@@ -1538,6 +1538,8 @@
   let Args = [UnsignedArgument<"NumBits">];
   let Documentation = [ArmSveVectorBitsDocs];
   let PragmaAttributeSupport = 0;
+  // Represented as VectorType instead.
+  let ASTNode = 0;
 }
 
 def ArmMveStrictPolymorphism : TypeAttr, TargetSpecificAttr<TargetARM> {
Index: clang/include/clang/Basic/DiagnosticSemaKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2921,6 +2921,8 @@
   "vector size not an integral multiple of component size">;
 def err_attribute_zero_size : Error<"zero %0 size">;
 def err_attribute_size_too_large : Error<"%0 size too large">;
+def err_typecheck_vector_not_convertable_sizeless : Error<
+  "cannot convert between fixed-length and sizeless vector (%0 and %1)">;
 def err_typecheck_vector_not_convertable_implict_truncation : Error<
    "cannot convert between %select{scalar|vector}0 type %1 and vector type"
    " %2 as implicit conversion would cause truncation">;
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -1997,10 +1997,7 @@
   bool RequireCompleteSizedType(SourceLocation Loc, QualType T, unsigned DiagID,
                                 const Ts &... Args) {
     SizelessTypeDiagnoser<Ts...> Diagnoser(DiagID, Args...);
-    CompleteTypeKind Kind = CompleteTypeKind::Normal;
-    if (T->isVLST())
-      Kind = CompleteTypeKind::AcceptSizeless;
-    return RequireCompleteType(Loc, T, Kind, Diagnoser);
+    return RequireCompleteType(Loc, T, CompleteTypeKind::Normal, Diagnoser);
   }
 
   void completeExprArrayBound(Expr *E);
@@ -2018,10 +2015,7 @@
   bool RequireCompleteSizedExprType(Expr *E, unsigned DiagID,
                                     const Ts &... Args) {
     SizelessTypeDiagnoser<Ts...> Diagnoser(DiagID, Args...);
-    CompleteTypeKind Kind = CompleteTypeKind::Normal;
-    if (E->getType()->isVLST())
-      Kind = CompleteTypeKind::AcceptSizeless;
-    return RequireCompleteExprType(E, Kind, Diagnoser);
+    return RequireCompleteExprType(E, CompleteTypeKind::Normal, Diagnoser);
   }
 
   bool RequireLiteralType(SourceLocation Loc, QualType T,
Index: clang/lib/AST/ASTContext.cpp
===================================================================
--- clang/lib/AST/ASTContext.cpp
+++ clang/lib/AST/ASTContext.cpp
@@ -1869,50 +1869,6 @@
   return TI;
 }
 
-static unsigned getSveVectorWidth(const Type *T) {
-  // Get the vector size from the 'arm_sve_vector_bits' attribute via the
-  // AttributedTypeLoc associated with the typedef decl.
-  if (const auto *TT = T->getAs<TypedefType>()) {
-    const TypedefNameDecl *Typedef = TT->getDecl();
-    TypeSourceInfo *TInfo = Typedef->getTypeSourceInfo();
-    TypeLoc TL = TInfo->getTypeLoc();
-    if (AttributedTypeLoc ATL = TL.getAs<AttributedTypeLoc>())
-      if (const auto *Attr = ATL.getAttrAs<ArmSveVectorBitsAttr>())
-        return Attr->getNumBits();
-  }
-
-  llvm_unreachable("bad 'arm_sve_vector_bits' attribute!");
-}
-
-static unsigned getSvePredWidth(const ASTContext &Context, const Type *T) {
-  return getSveVectorWidth(T) / Context.getCharWidth();
-}
-
-unsigned ASTContext::getBitwidthForAttributedSveType(const Type *T) const {
-  assert(T->isVLST() &&
-         "getBitwidthForAttributedSveType called for non-attributed type!");
-
-  switch (T->castAs<BuiltinType>()->getKind()) {
-  default:
-    llvm_unreachable("unknown builtin type!");
-  case BuiltinType::SveInt8:
-  case BuiltinType::SveInt16:
-  case BuiltinType::SveInt32:
-  case BuiltinType::SveInt64:
-  case BuiltinType::SveUint8:
-  case BuiltinType::SveUint16:
-  case BuiltinType::SveUint32:
-  case BuiltinType::SveUint64:
-  case BuiltinType::SveFloat16:
-  case BuiltinType::SveFloat32:
-  case BuiltinType::SveFloat64:
-  case BuiltinType::SveBFloat16:
-    return getSveVectorWidth(T);
-  case BuiltinType::SveBool:
-    return getSvePredWidth(*this, T);
-  }
-}
-
 /// getTypeInfoImpl - Return the size of the specified type, in bits.  This
 /// method does not work on incomplete types.
 ///
@@ -1979,6 +1935,9 @@
     uint64_t TargetVectorAlign = Target->getMaxVectorAlign();
     if (TargetVectorAlign && TargetVectorAlign < Align)
       Align = TargetVectorAlign;
+    // Adjust the alignment for fixed-length SVE predicates.
+    if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+      Align = 16;
     break;
   }
 
@@ -2317,10 +2276,7 @@
       Align = Info.Align;
       AlignIsRequired = Info.AlignIsRequired;
     }
-    if (T->isVLST())
-      Width = getBitwidthForAttributedSveType(T);
-    else
-      Width = Info.Width;
+    Width = Info.Width;
     break;
   }
 
Index: clang/lib/AST/ItaniumMangle.cpp
===================================================================
--- clang/lib/AST/ItaniumMangle.cpp
+++ clang/lib/AST/ItaniumMangle.cpp
@@ -531,6 +531,8 @@
   void mangleNeonVectorType(const DependentVectorType *T);
   void mangleAArch64NeonVectorType(const VectorType *T);
   void mangleAArch64NeonVectorType(const DependentVectorType *T);
+  void mangleAArch64FixedSveVectorType(const VectorType *T);
+  void mangleAArch64FixedSveVectorType(const DependentVectorType *T);
 
   void mangleIntegerLiteral(QualType T, const llvm::APSInt &Value);
   void mangleMemberExprBase(const Expr *base, bool isArrow);
@@ -3298,6 +3300,77 @@
   Diags.Report(T->getAttributeLoc(), DiagID);
 }
 
+// The AArch64 ACLE specifies that fixed-length SVE vector and predicate types
+// defined with the 'arm_sve_vector_bits' attribute map to the same AAPCS64
+// type as the sizeless variants. The mangling scheme is defined in the
+// appendices to the Procedure Call Standard for the Arm Architecture, see:
+// https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#appendix-c-mangling
+void CXXNameMangler::mangleAArch64FixedSveVectorType(const VectorType *T) {
+  assert((T->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+          T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) &&
+         "expected fixed-length SVE vector!");
+  QualType EltType = T->getElementType();
+  assert(EltType->isBuiltinType() &&
+         "expected builtin type for fixed-length SVE vector!");
+
+  StringRef TypeName;
+  switch (cast<BuiltinType>(EltType)->getKind()) {
+  case BuiltinType::SChar:
+    TypeName = "__SVInt8_t";
+    break;
+  case BuiltinType::UChar: {
+    if (T->getVectorKind() == VectorType::SveFixedLengthDataVector)
+      TypeName = "__SVUint8_t";
+    else
+      TypeName = "__SVBool_t";
+    break;
+  }
+  case BuiltinType::Short:
+    TypeName = "__SVInt16_t";
+    break;
+  case BuiltinType::UShort:
+    TypeName = "__SVUint16_t";
+    break;
+  case BuiltinType::Int:
+    TypeName = "__SVInt32_t";
+    break;
+  case BuiltinType::UInt:
+    TypeName = "__SVUint32_t";
+    break;
+  case BuiltinType::Long:
+    TypeName = "__SVInt64_t";
+    break;
+  case BuiltinType::ULong:
+    TypeName = "__SVUint64_t";
+    break;
+  case BuiltinType::Float16:
+    TypeName = "__SVFloat16_t";
+    break;
+  case BuiltinType::Float:
+    TypeName = "__SVFloat32_t";
+    break;
+  case BuiltinType::Double:
+    TypeName = "__SVFloat64_t";
+    break;
+  case BuiltinType::BFloat16:
+    TypeName = "__SVBfloat16_t";
+    break;
+  default:
+    llvm_unreachable("unexpected element type for fixed-length SVE vector!");
+  }
+
+  Out << 'u' << TypeName.size() << TypeName;
+}
+
+void CXXNameMangler::mangleAArch64FixedSveVectorType(
+    const DependentVectorType *T) {
+  DiagnosticsEngine &Diags = Context.getDiags();
+  unsigned DiagID = Diags.getCustomDiagID(
+      DiagnosticsEngine::Error,
+      "cannot mangle this dependent fixed-length SVE vector type yet");
+  Diags.Report(T->getAttributeLoc(), DiagID);
+}
+
 // GNU extension: vector types
 // <type>                  ::= <vector-type>
 // <vector-type>           ::= Dv <positive dimension number> _
@@ -3318,6 +3391,10 @@
     else
       mangleNeonVectorType(T);
     return;
+  } else if (T->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+             T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
+    mangleAArch64FixedSveVectorType(T);
+    return;
   }
   Out << "Dv" << T->getNumElements() << '_';
   if (T->getVectorKind() == VectorType::AltiVecPixel)
@@ -3340,6 +3417,10 @@
     else
       mangleNeonVectorType(T);
     return;
+  } else if (T->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+             T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
+    mangleAArch64FixedSveVectorType(T);
+    return;
   }
 
   Out << "Dv";
Index: clang/lib/AST/JSONNodeDumper.cpp
===================================================================
--- clang/lib/AST/JSONNodeDumper.cpp
+++ clang/lib/AST/JSONNodeDumper.cpp
@@ -616,6 +616,9 @@
   case VectorType::NeonPolyVector:
     JOS.attribute("vectorKind", "neon poly");
     break;
+  case VectorType::SveFixedLengthDataVector:
+  case VectorType::SveFixedLengthPredicateVector:
+    JOS.attribute("vectorKind", "fixed-length sve");
   }
 }
 
Index: clang/lib/AST/TextNodeDumper.cpp
===================================================================
--- clang/lib/AST/TextNodeDumper.cpp
+++ clang/lib/AST/TextNodeDumper.cpp
@@ -1389,6 +1389,9 @@
   case VectorType::NeonPolyVector:
     OS << " neon poly";
     break;
+  case VectorType::SveFixedLengthDataVector:
+  case VectorType::SveFixedLengthPredicateVector:
+    OS << " fixed-length sve";
   }
   OS << " " << T->getNumElements();
 }
Index: clang/lib/AST/Type.cpp
===================================================================
--- clang/lib/AST/Type.cpp
+++ clang/lib/AST/Type.cpp
@@ -2318,11 +2318,42 @@
   return false;
 }
 
-bool Type::isVLST() const {
-  if (!isVLSTBuiltinType())
-    return false;
+QualType Type::getFixedLengthSveEltType(const ASTContext &Ctx) const {
+  assert(isVLSTBuiltinType() && "unsupported type!");
 
-  return hasAttr(attr::ArmSveVectorBits);
+  const BuiltinType *BTy = getAs<BuiltinType>();
+  switch (BTy->getKind()) {
+  default:
+    llvm_unreachable("Unknown builtin SVE type!");
+  case BuiltinType::SveInt8:
+    return Ctx.SignedCharTy;
+  case BuiltinType::SveUint8:
+  case BuiltinType::SveBool:
+    // Represent predicates as i8 rather than i1 to avoid any layout issues.
+    // The type is bitcasted to a scalable predicate type when casting between
+    // scalable and fixed-length vectors.
+    return Ctx.UnsignedCharTy;
+  case BuiltinType::SveInt16:
+    return Ctx.ShortTy;
+  case BuiltinType::SveUint16:
+    return Ctx.UnsignedShortTy;
+  case BuiltinType::SveInt32:
+    return Ctx.IntTy;
+  case BuiltinType::SveUint32:
+    return Ctx.UnsignedIntTy;
+  case BuiltinType::SveInt64:
+    return Ctx.LongTy;
+  case BuiltinType::SveUint64:
+    return Ctx.UnsignedLongTy;
+  case BuiltinType::SveFloat16:
+    return Ctx.Float16Ty;
+  case BuiltinType::SveBFloat16:
+    return Ctx.BFloat16Ty;
+  case BuiltinType::SveFloat32:
+    return Ctx.FloatTy;
+  case BuiltinType::SveFloat64:
+    return Ctx.DoubleTy;
+  }
 }
 
 bool QualType::isPODType(const ASTContext &Context) const {
Index: clang/lib/AST/TypePrinter.cpp
===================================================================
--- clang/lib/AST/TypePrinter.cpp
+++ clang/lib/AST/TypePrinter.cpp
@@ -655,6 +655,24 @@
     printBefore(T->getElementType(), OS);
     break;
   }
+  case VectorType::SveFixedLengthDataVector:
+  case VectorType::SveFixedLengthPredicateVector:
+    // FIXME: We prefer to print the size directly here, but have no way
+    // to get the size of the type.
+    OS << "__attribute__((__arm_sve_vector_bits__(";
+
+    if (T->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+      // Predicates take a bit per byte of the vector size, multiply by 8 to
+      // get the number of bits passed to the attribute.
+      OS << T->getNumElements() * 8;
+    else
+      OS << T->getNumElements();
+
+    OS << " * sizeof(";
+    print(T->getElementType(), OS, StringRef());
+    // Multiply by 8 for the number of bits.
+    OS << ") * 8))) ";
+    printBefore(T->getElementType(), OS);
   }
 }
 
@@ -702,6 +720,24 @@
     printBefore(T->getElementType(), OS);
     break;
   }
+  case VectorType::SveFixedLengthDataVector:
+  case VectorType::SveFixedLengthPredicateVector:
+    // FIXME: We prefer to print the size directly here, but have no way
+    // to get the size of the type.
+    OS << "__attribute__((__arm_sve_vector_bits__(";
+    if (T->getSizeExpr()) {
+      T->getSizeExpr()->printPretty(OS, nullptr, Policy);
+      if (T->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+        // Predicates take a bit per byte of the vector size, multiply by 8 to
+        // get the number of bits passed to the attribute.
+        OS << " * 8";
+      OS << " * sizeof(";
+      print(T->getElementType(), OS, StringRef());
+      // Multiply by 8 for the number of bits.
+      OS << ") * 8";
+    }
+    OS << "))) ";
+    printBefore(T->getElementType(), OS);
   }
 }
 
@@ -1632,9 +1668,6 @@
   case attr::ArmMveStrictPolymorphism:
     OS << "__clang_arm_mve_strict_polymorphism";
     break;
-  case attr::ArmSveVectorBits:
-    OS << "arm_sve_vector_bits";
-    break;
   }
   OS << "))";
 }
Index: clang/lib/CodeGen/CGCall.cpp
===================================================================
--- clang/lib/CodeGen/CGCall.cpp
+++ clang/lib/CodeGen/CGCall.cpp
@@ -1119,12 +1119,13 @@
 
 /// Create a temporary allocation for the purposes of coercion.
 static Address CreateTempAllocaForCoercion(CodeGenFunction &CGF, llvm::Type *Ty,
-                                           CharUnits MinAlign) {
+                                           CharUnits MinAlign,
+                                           const Twine &Name = "tmp") {
   // Don't use an alignment that's worse than what LLVM would prefer.
   auto PrefAlign = CGF.CGM.getDataLayout().getPrefTypeAlignment(Ty);
   CharUnits Align = std::max(MinAlign, CharUnits::fromQuantity(PrefAlign));
 
-  return CGF.CreateTempAlloca(Ty, Align);
+  return CGF.CreateTempAlloca(Ty, Align, Name + ".coerce");
 }
 
 /// EnterStructPointerForCoercedAccess - Given a struct pointer that we are
@@ -1230,14 +1231,15 @@
   if (SrcTy == Ty)
     return CGF.Builder.CreateLoad(Src);
 
-  uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);
+  llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(Ty);
 
   if (llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
-    Src = EnterStructPointerForCoercedAccess(Src, SrcSTy, DstSize, CGF);
+    Src = EnterStructPointerForCoercedAccess(Src, SrcSTy,
+                                             DstSize.getKnownMinSize(), CGF);
     SrcTy = Src.getElementType();
   }
 
-  uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
+  llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
 
   // If the source and destination are integer or pointer types, just do an
   // extension or truncation to the desired type.
@@ -1248,7 +1250,8 @@
   }
 
   // If load is legal, just bitcast the src pointer.
-  if (SrcSize >= DstSize) {
+  if ((!SrcSize.isScalable() && !DstSize.isScalable()) &&
+      SrcSize.getKnownMinSize() >= DstSize.getKnownMinSize()) {
     // Generally SrcSize is never greater than DstSize, since this means we are
     // losing bits. However, this can happen in cases where the structure has
     // additional padding, for example due to a user specified alignment.
@@ -1261,10 +1264,12 @@
   }
 
   // Otherwise do coercion through memory. This is stupid, but simple.
-  Address Tmp = CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment());
-  CGF.Builder.CreateMemCpy(Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
-                           Src.getPointer(), Src.getAlignment().getAsAlign(),
-                           llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize));
+  Address Tmp =
+      CreateTempAllocaForCoercion(CGF, Ty, Src.getAlignment(), Src.getName());
+  CGF.Builder.CreateMemCpy(
+      Tmp.getPointer(), Tmp.getAlignment().getAsAlign(), Src.getPointer(),
+      Src.getAlignment().getAsAlign(),
+      llvm::ConstantInt::get(CGF.IntPtrTy, SrcSize.getKnownMinSize()));
   return CGF.Builder.CreateLoad(Tmp);
 }
 
@@ -1303,10 +1308,11 @@
     return;
   }
 
-  uint64_t SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
+  llvm::TypeSize SrcSize = CGF.CGM.getDataLayout().getTypeAllocSize(SrcTy);
 
   if (llvm::StructType *DstSTy = dyn_cast<llvm::StructType>(DstTy)) {
-    Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy, SrcSize, CGF);
+    Dst = EnterStructPointerForCoercedAccess(Dst, DstSTy,
+                                             SrcSize.getKnownMinSize(), CGF);
     DstTy = Dst.getElementType();
   }
 
@@ -1328,10 +1334,13 @@
     return;
   }
 
-  uint64_t DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
+  llvm::TypeSize DstSize = CGF.CGM.getDataLayout().getTypeAllocSize(DstTy);
 
   // If store is legal, just bitcast the src pointer.
-  if (SrcSize <= DstSize) {
+  // FIXME: does this check for scalable vectors need to be more conservative?
+  if (SrcSize.getKnownMinSize() <= DstSize.getKnownMinSize() ||
+      (isa<llvm::ScalableVectorType>(SrcTy) ||
+       isa<llvm::ScalableVectorType>(DstTy))) {
     Dst = CGF.Builder.CreateElementBitCast(Dst, SrcTy);
     CGF.EmitAggregateStore(Src, Dst, DstIsVolatile);
   } else {
@@ -1346,9 +1355,10 @@
     // to that information.
     Address Tmp = CreateTempAllocaForCoercion(CGF, SrcTy, Dst.getAlignment());
     CGF.Builder.CreateStore(Src, Tmp);
-    CGF.Builder.CreateMemCpy(Dst.getPointer(), Dst.getAlignment().getAsAlign(),
-                             Tmp.getPointer(), Tmp.getAlignment().getAsAlign(),
-                             llvm::ConstantInt::get(CGF.IntPtrTy, DstSize));
+    CGF.Builder.CreateMemCpy(
+        Dst.getPointer(), Dst.getAlignment().getAsAlign(), Tmp.getPointer(),
+        Tmp.getAlignment().getAsAlign(),
+        llvm::ConstantInt::get(CGF.IntPtrTy, DstSize.getKnownMinSize()));
   }
 }
 
Index: clang/lib/CodeGen/CGExprScalar.cpp
===================================================================
--- clang/lib/CodeGen/CGExprScalar.cpp
+++ clang/lib/CodeGen/CGExprScalar.cpp
@@ -2079,6 +2079,34 @@
       }
     }
 
+    // Perform VLAT <-> VLST bitcast through memory.
+    if ((isa<llvm::FixedVectorType>(SrcTy) &&
+         isa<llvm::ScalableVectorType>(DstTy)) ||
+        (isa<llvm::ScalableVectorType>(SrcTy) &&
+         isa<llvm::FixedVectorType>(DstTy))) {
+      if (const CallExpr *CE = dyn_cast<CallExpr>(E)) {
+        // Call expressions can't have a scalar return unless the return type
+        // is a reference type so an lvalue can't be emitted. Create a temp
+        // alloca to store the call, bitcast the address then load.
+        QualType RetTy = CE->getCallReturnType(CGF.getContext());
+        Address Addr =
+            CGF.CreateDefaultAlignTempAlloca(SrcTy, "saved-call-rvalue");
+        LValue LV = CGF.MakeAddrLValue(Addr, RetTy);
+        CGF.EmitStoreOfScalar(Src, LV);
+        Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy),
+                                            "castFixedSve");
+        LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
+        DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
+        return EmitLoadOfLValue(DestLV, CE->getExprLoc());
+      }
+
+      Address Addr = EmitLValue(E).getAddress(CGF);
+      Addr = Builder.CreateElementBitCast(Addr, CGF.ConvertTypeForMem(DestTy));
+      LValue DestLV = CGF.MakeAddrLValue(Addr, DestTy);
+      DestLV.setTBAAInfo(TBAAAccessInfo::getMayAliasInfo());
+      return EmitLoadOfLValue(DestLV, CE->getExprLoc());
+    }
+
     return Builder.CreateBitCast(Src, DstTy);
   }
   case CK_AddressSpaceConversion: {
Index: clang/lib/CodeGen/TargetInfo.cpp
===================================================================
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -5448,6 +5448,7 @@
 
   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
   ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  ABIArgInfo coerceIllegalVector(QualType Ty) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
                                          uint64_t Members) const override;
@@ -5581,33 +5582,96 @@
 }
 }
 
+ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType Ty) const {
+  assert(Ty->isVectorType() && "expected vector type!");
+
+  const auto *VT = Ty->castAs<VectorType>();
+  if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
+    assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
+    assert(VT->getElementType()->castAs<BuiltinType>()->getKind() ==
+               BuiltinType::UChar &&
+           "unexpected builtin type for SVE predicate!");
+    return ABIArgInfo::getDirect(llvm::ScalableVectorType::get(
+        llvm::Type::getInt1Ty(getVMContext()), 16));
+  }
+
+  if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector) {
+    assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
+
+    const auto *BT = VT->getElementType()->castAs<BuiltinType>();
+    llvm::ScalableVectorType *ResType = nullptr;
+    switch (BT->getKind()) {
+    default:
+      llvm_unreachable("unexpected builtin type for SVE vector!");
+    case BuiltinType::SChar:
+    case BuiltinType::UChar:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getInt8Ty(getVMContext()), 16);
+      break;
+    case BuiltinType::Short:
+    case BuiltinType::UShort:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getInt16Ty(getVMContext()), 8);
+      break;
+    case BuiltinType::Int:
+    case BuiltinType::UInt:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getInt32Ty(getVMContext()), 4);
+      break;
+    case BuiltinType::Long:
+    case BuiltinType::ULong:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getInt64Ty(getVMContext()), 2);
+      break;
+    case BuiltinType::Float16:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getHalfTy(getVMContext()), 8);
+      break;
+    case BuiltinType::Float:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getFloatTy(getVMContext()), 4);
+      break;
+    case BuiltinType::Double:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getDoubleTy(getVMContext()), 2);
+      break;
+    case BuiltinType::BFloat16:
+      ResType = llvm::ScalableVectorType::get(
+          llvm::Type::getBFloatTy(getVMContext()), 8);
+      break;
+    }
+    return ABIArgInfo::getDirect(ResType);
+  }
+
+  uint64_t Size = getContext().getTypeSize(Ty);
+  // Android promotes <2 x i8> to i16, not i32
+  if (isAndroid() && (Size <= 16)) {
+    llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
+    return ABIArgInfo::getDirect(ResType);
+  }
+  if (Size <= 32) {
+    llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
+    return ABIArgInfo::getDirect(ResType);
+  }
+  if (Size == 64) {
+    auto *ResType =
+        llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
+    return ABIArgInfo::getDirect(ResType);
+  }
+  if (Size == 128) {
+    auto *ResType =
+        llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
+    return ABIArgInfo::getDirect(ResType);
+  }
+  return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
+}
+
 ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
-  if (isIllegalVectorType(Ty)) {
-    uint64_t Size = getContext().getTypeSize(Ty);
-    // Android promotes <2 x i8> to i16, not i32
-    if (isAndroid() && (Size <= 16)) {
-      llvm::Type *ResType = llvm::Type::getInt16Ty(getVMContext());
-      return ABIArgInfo::getDirect(ResType);
-    }
-    if (Size <= 32) {
-      llvm::Type *ResType = llvm::Type::getInt32Ty(getVMContext());
-      return ABIArgInfo::getDirect(ResType);
-    }
-    if (Size == 64) {
-      auto *ResType =
-          llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 2);
-      return ABIArgInfo::getDirect(ResType);
-    }
-    if (Size == 128) {
-      auto *ResType =
-          llvm::FixedVectorType::get(llvm::Type::getInt32Ty(getVMContext()), 4);
-      return ABIArgInfo::getDirect(ResType);
-    }
-    return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
-  }
+  if (isIllegalVectorType(Ty))
+    return coerceIllegalVector(Ty);
 
   if (!isAggregateTypeForABI(Ty)) {
     // Treat an enum type as its underlying type.
@@ -5686,6 +5750,12 @@
   if (RetTy->isVoidType())
     return ABIArgInfo::getIgnore();
 
+  if (const auto *VT = RetTy->getAs<VectorType>()) {
+    if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+        VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+      return coerceIllegalVector(RetTy);
+  }
+
   // Large vector types should be returned via memory.
   if (RetTy->isVectorType() && getContext().getTypeSize(RetTy) > 128)
     return getNaturalAlignIndirect(RetTy);
@@ -5741,6 +5811,13 @@
 /// isIllegalVectorType - check whether the vector type is legal for AArch64.
 bool AArch64ABIInfo::isIllegalVectorType(QualType Ty) const {
   if (const VectorType *VT = Ty->getAs<VectorType>()) {
+    // Check whether VT is a fixed-length SVE vector. These types are
+    // represented as scalable vectors in function args/return and must be
+    // coerced from fixed vectors.
+    if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+        VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+      return true;
+
     // Check whether VT is legal.
     unsigned NumElements = VT->getNumElements();
     uint64_t Size = getContext().getTypeSize(VT);
Index: clang/lib/Sema/SemaDecl.cpp
===================================================================
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -8004,7 +8004,7 @@
     return;
   }
 
-  if (!NewVD->hasLocalStorage() && T->isSizelessType() && !T->isVLST()) {
+  if (!NewVD->hasLocalStorage() && T->isSizelessType()) {
     Diag(NewVD->getLocation(), diag::err_sizeless_nonlocal) << T;
     NewVD->setInvalidDecl();
     return;
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -8896,6 +8896,33 @@
   return false;
 }
 
+/// This helper function returns true if LHSType is an SVE builtin type and
+/// RHSType is a valid fixed-length representation of LHSType, and vice versa.
+static bool areCompatibleSveTypes(QualType LHSType, QualType RHSType,
+                                  ASTContext &Context) {
+  auto IsValidCast = [](QualType LHSType, QualType RHSType,
+                        ASTContext &Context) {
+    if (const auto *BT = LHSType->getAs<BuiltinType>()) {
+      if (const auto *VT = RHSType->getAs<VectorType>()) {
+        // Predicates have the same representation as uint8 so we also have to
+        // check the kind to make these types incompatible.
+        if (BT->getKind() == BuiltinType::SveBool &&
+            VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector &&
+            isVector(RHSType, LHSType->getFixedLengthSveEltType(Context)))
+          return true;
+
+        if (VT->getVectorKind() == VectorType::SveFixedLengthDataVector &&
+            isVector(RHSType, LHSType->getFixedLengthSveEltType(Context)))
+          return true;
+      }
+    }
+    return false;
+  };
+
+  return IsValidCast(LHSType, RHSType, Context) ||
+         IsValidCast(RHSType, LHSType, Context);
+}
+
 /// CheckAssignmentConstraints (C99 6.5.16) - This routine currently
 /// has code to accommodate several GCC extensions when type checking
 /// pointers. Here are some objectionable examples that GCC considers warnings:
@@ -9006,6 +9033,15 @@
       }
     }
 
+    if ((LHSType->isSizelessBuiltinType() && RHSType->isVectorType()) ||
+        (LHSType->isVectorType() && RHSType->isSizelessBuiltinType())) {
+      // Allow assignments between fixed-length and sizeless SVE vectors.
+      if (areCompatibleSveTypes(LHSType, RHSType, Context)) {
+        Kind = CK_BitCast;
+        return Compatible;
+      }
+    }
+
     return Incompatible;
   }
 
@@ -9892,6 +9928,22 @@
 
   // Okay, the expression is invalid.
 
+  // If there's a sizeless operand, diagnose that.
+  if ((LHSVecType &&
+       ((LHSVecType->getVectorKind() == VectorType::SveFixedLengthDataVector) ||
+        (LHSVecType->getVectorKind() ==
+         VectorType::SveFixedLengthPredicateVector)) &&
+       RHSType->isSizelessBuiltinType()) ||
+      (RHSVecType &&
+       ((RHSVecType->getVectorKind() == VectorType::SveFixedLengthDataVector) ||
+        (RHSVecType->getVectorKind() ==
+         VectorType::SveFixedLengthPredicateVector)) &&
+       LHSType->isSizelessBuiltinType())) {
+    Diag(Loc, diag::err_typecheck_vector_not_convertable_sizeless)
+        << LHSType << RHSType;
+    return QualType();
+  }
+
   // If there's a non-vector, non-real operand, diagnose that.
   if ((!RHSVecType && !RHSType->isRealType()) ||
       (!LHSVecType && !LHSType->isRealType())) {
Index: clang/lib/Sema/SemaType.cpp
===================================================================
--- clang/lib/Sema/SemaType.cpp
+++ clang/lib/Sema/SemaType.cpp
@@ -2304,7 +2304,7 @@
       return QualType();
   }
 
-  if (T->isSizelessType() && !T->isVLST()) {
+  if (T->isSizelessType()) {
     Diag(Loc, diag::err_array_incomplete_or_sizeless_type) << 1 << T;
     return QualType();
   }
@@ -7754,14 +7754,10 @@
 /// HandleArmSveVectorBitsTypeAttr - The "arm_sve_vector_bits" attribute is
 /// used to create fixed-length versions of sizeless SVE types defined by
 /// the ACLE, such as svint32_t and svbool_t.
-static void HandleArmSveVectorBitsTypeAttr(TypeProcessingState &State,
-                                           QualType &CurType,
-                                           ParsedAttr &Attr) {
-  Sema &S = State.getSema();
-  ASTContext &Ctx = S.Context;
-
+static void HandleArmSveVectorBitsTypeAttr(QualType &CurType, ParsedAttr &Attr,
+                                           Sema &S) {
   // Target must have SVE.
-  if (!Ctx.getTargetInfo().hasFeature("sve")) {
+  if (!S.Context.getTargetInfo().hasFeature("sve")) {
     S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr;
     Attr.setInvalid();
     return;
@@ -7806,8 +7802,18 @@
     return;
   }
 
-  auto *A = ::new (Ctx) ArmSveVectorBitsAttr(Ctx, Attr, VecSize);
-  CurType = State.getAttributedType(A, CurType, CurType);
+  const auto *BT = CurType->castAs<BuiltinType>();
+
+  QualType EltType = CurType->getFixedLengthSveEltType(S.Context);
+  unsigned TypeSize = S.Context.getTypeSize(EltType);
+  VectorType::VectorKind VecKind = VectorType::SveFixedLengthDataVector;
+  if (BT->getKind() == BuiltinType::SveBool) {
+    // Predicates are represented as i8
+    VecSize /= S.Context.getCharWidth() * S.Context.getCharWidth();
+    VecKind = VectorType::SveFixedLengthPredicateVector;
+  } else
+    VecSize /= TypeSize;
+  CurType = S.Context.getVectorType(EltType, VecSize, VecKind);
 }
 
 static void HandleArmMveStrictPolymorphismAttr(TypeProcessingState &State,
@@ -8074,7 +8080,7 @@
       attr.setUsedAsTypeAttr();
       break;
     case ParsedAttr::AT_ArmSveVectorBits:
-      HandleArmSveVectorBitsTypeAttr(state, type, attr);
+      HandleArmSveVectorBitsTypeAttr(type, attr, state.getSema());
       attr.setUsedAsTypeAttr();
       break;
     case ParsedAttr::AT_ArmMveStrictPolymorphism: {
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-bitcast.c
@@ -0,0 +1,278 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=128 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=256 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+
+#include <arm_sve.h>
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+#define DEFINE_STRUCT(ty)   \
+  struct struct_##ty {      \
+    fixed_##ty##_t x, y[3]; \
+  } struct_##ty;
+
+DEFINE_STRUCT(int64)
+DEFINE_STRUCT(float64)
+DEFINE_STRUCT(bfloat16)
+DEFINE_STRUCT(bool)
+
+//===----------------------------------------------------------------------===//
+// int64
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @read_int64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <2 x i64>* [[ARRAYIDX]] to <vscale x 2 x i64>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CHECK-256-LABEL: @read_int64(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <4 x i64>* [[ARRAYIDX]] to <vscale x 2 x i64>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+// CHECK-512-LABEL: @read_int64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <8 x i64>* [[ARRAYIDX]] to <vscale x 2 x i64>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    ret <vscale x 2 x i64> [[TMP1]]
+//
+svint64_t read_int64(struct struct_int64 *s) {
+  return s->y[0];
+}
+
+// CHECK-128-LABEL: @write_int64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
+// CHECK-128-NEXT:    store <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64>* [[X_ADDR]], align 16, !tbaa !5
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x i64>* [[X_ADDR]] to <2 x i64>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-256-LABEL: @write_int64(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
+// CHECK-256-NEXT:    store <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64>* [[X_ADDR]], align 16, !tbaa !5
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x i64>* [[X_ADDR]] to <4 x i64>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    store <4 x i64> [[TMP1]], <4 x i64>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-256-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_int64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
+// CHECK-512-NEXT:    store <vscale x 2 x i64> [[X:%.*]], <vscale x 2 x i64>* [[X_ADDR]], align 16, !tbaa !5
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x i64>* [[X_ADDR]] to <8 x i64>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64:%.*]], %struct.struct_int64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    store <8 x i64> [[TMP1]], <8 x i64>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-512-NEXT:    ret void
+//
+void write_int64(struct struct_int64 *s, svint64_t x) {
+  s->y[0] = x;
+}
+
+//===----------------------------------------------------------------------===//
+// float64
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @read_float64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <2 x double>* [[ARRAYIDX]] to <vscale x 2 x double>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-256-LABEL: @read_float64(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <4 x double>* [[ARRAYIDX]] to <vscale x 2 x double>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+// CHECK-512-LABEL: @read_float64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <8 x double>* [[ARRAYIDX]] to <vscale x 2 x double>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    ret <vscale x 2 x double> [[TMP1]]
+//
+svfloat64_t read_float64(struct struct_float64 *s) {
+  return s->y[0];
+}
+
+// CHECK-128-LABEL: @write_float64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-128-NEXT:    store <vscale x 2 x double> [[X:%.*]], <vscale x 2 x double>* [[X_ADDR]], align 16, !tbaa !7
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x double>* [[X_ADDR]] to <2 x double>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    store <2 x double> [[TMP1]], <2 x double>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-256-LABEL: @write_float64(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-256-NEXT:    store <vscale x 2 x double> [[X:%.*]], <vscale x 2 x double>* [[X_ADDR]], align 16, !tbaa !7
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x double>* [[X_ADDR]] to <4 x double>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <4 x double>, <4 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    store <4 x double> [[TMP1]], <4 x double>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-256-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_float64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-512-NEXT:    store <vscale x 2 x double> [[X:%.*]], <vscale x 2 x double>* [[X_ADDR]], align 16, !tbaa !7
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x double>* [[X_ADDR]] to <8 x double>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64:%.*]], %struct.struct_float64* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    store <8 x double> [[TMP1]], <8 x double>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-512-NEXT:    ret void
+//
+void write_float64(struct struct_float64 *s, svfloat64_t x) {
+  s->y[0] = x;
+}
+
+//===----------------------------------------------------------------------===//
+// bfloat16
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @read_bfloat16(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <8 x bfloat>* [[ARRAYIDX]] to <vscale x 8 x bfloat>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CHECK-256-LABEL: @read_bfloat16(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <16 x bfloat>* [[ARRAYIDX]] to <vscale x 8 x bfloat>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+// CHECK-512-LABEL: @read_bfloat16(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <32 x bfloat>* [[ARRAYIDX]] to <vscale x 8 x bfloat>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    ret <vscale x 8 x bfloat> [[TMP1]]
+//
+svbfloat16_t read_bfloat16(struct struct_bfloat16 *s) {
+  return s->y[0];
+}
+
+// CHECK-128-LABEL: @write_bfloat16(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
+// CHECK-128-NEXT:    store <vscale x 8 x bfloat> [[X:%.*]], <vscale x 8 x bfloat>* [[X_ADDR]], align 16, !tbaa !9
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat>* [[X_ADDR]] to <8 x bfloat>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    store <8 x bfloat> [[TMP1]], <8 x bfloat>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-256-LABEL: @write_bfloat16(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
+// CHECK-256-NEXT:    store <vscale x 8 x bfloat> [[X:%.*]], <vscale x 8 x bfloat>* [[X_ADDR]], align 16, !tbaa !9
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat>* [[X_ADDR]] to <16 x bfloat>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <16 x bfloat>, <16 x bfloat>* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    store <16 x bfloat> [[TMP1]], <16 x bfloat>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-256-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_bfloat16(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
+// CHECK-512-NEXT:    store <vscale x 8 x bfloat> [[X:%.*]], <vscale x 8 x bfloat>* [[X_ADDR]], align 16, !tbaa !9
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat>* [[X_ADDR]] to <32 x bfloat>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BFLOAT16:%.*]], %struct.struct_bfloat16* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    store <32 x bfloat> [[TMP1]], <32 x bfloat>* [[ARRAYIDX]], align 16, !tbaa !2
+// CHECK-512-NEXT:    ret void
+//
+void write_bfloat16(struct struct_bfloat16 *s, svbfloat16_t x) {
+  s->y[0] = x;
+}
+
+//===----------------------------------------------------------------------===//
+// bool
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @read_bool(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <2 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, !tbaa !2
+// CHECK-128-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CHECK-256-LABEL: @read_bool(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <4 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, !tbaa !2
+// CHECK-256-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+// CHECK-512-LABEL: @read_bool(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8>* [[ARRAYIDX]] to <vscale x 16 x i1>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP0]], align 2, !tbaa !2
+// CHECK-512-NEXT:    ret <vscale x 16 x i1> [[TMP1]]
+//
+svbool_t read_bool(struct struct_bool *s) {
+  return s->y[0];
+}
+
+// CHECK-128-LABEL: @write_bool(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-128-NEXT:    store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, !tbaa !11
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to <2 x i8>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, !tbaa !2
+// CHECK-128-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1, i64 0
+// CHECK-128-NEXT:    store <2 x i8> [[TMP1]], <2 x i8>* [[ARRAYIDX]], align 2, !tbaa !2
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-256-LABEL: @write_bool(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-256-NEXT:    store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, !tbaa !11
+// CHECK-256-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to i32*
+// CHECK-256-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 16, !tbaa !2
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP2:%.*]] = bitcast [3 x <4 x i8>]* [[Y]] to i32*
+// CHECK-256-NEXT:    store i32 [[TMP1]], i32* [[TMP2]], align 2, !tbaa !2
+// CHECK-256-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_bool(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-512-NEXT:    store <vscale x 16 x i1> [[X:%.*]], <vscale x 16 x i1>* [[X_ADDR]], align 16, !tbaa !11
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[X_ADDR]] to i64*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, !tbaa !2
+// CHECK-512-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_BOOL:%.*]], %struct.struct_bool* [[S:%.*]], i64 0, i32 1
+// CHECK-512-NEXT:    [[TMP2:%.*]] = bitcast [3 x <8 x i8>]* [[Y]] to i64*
+// CHECK-512-NEXT:    store i64 [[TMP1]], i64* [[TMP2]], align 2, !tbaa !2
+// CHECK-512-NEXT:    ret void
+//
+void write_bool(struct struct_bool *s, svbool_t x) {
+  s->y[0] = x;
+}
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-call.c
@@ -0,0 +1,337 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+//===----------------------------------------------------------------------===//
+// Test caller/callee with VLST <-> VLAT
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @sizeless_callee(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[X:%.*]]
+//
+svint32_t sizeless_callee(svint32_t x) {
+  return x;
+}
+
+// CHECK-LABEL: @fixed_caller(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[X:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i32>* [[X]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[X_COERCE:%.*]], <vscale x 4 x i32>* [[TMP0]], align 16
+// CHECK-NEXT:    [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, !tbaa !2
+// CHECK-NEXT:    store <16 x i32> [[X1]], <16 x i32>* [[X_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i32>* [[X_ADDR]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !5
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[TMP3]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+//
+fixed_int32_t fixed_caller(fixed_int32_t x) {
+  return sizeless_callee(x);
+}
+
+// CHECK-LABEL: @fixed_callee(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[X:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i32>* [[X]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[X_COERCE:%.*]], <vscale x 4 x i32>* [[TMP0]], align 16
+// CHECK-NEXT:    [[X1:%.*]] = load <16 x i32>, <16 x i32>* [[X]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[X1]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP1]]
+//
+fixed_int32_t fixed_callee(fixed_int32_t x) {
+  return x;
+}
+
+// CHECK-LABEL: @sizeless_caller(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[COERCE_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[COERCE1:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <16 x i32>, align 64
+// CHECK-NEXT:    store <vscale x 4 x i32> [[X:%.*]], <vscale x 4 x i32>* [[X_ADDR]], align 16, !tbaa !5
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 4 x i32>* [[X_ADDR]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, !tbaa !2
+// CHECK-NEXT:    [[COERCE_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[COERCE_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* [[COERCE_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[COERCE_COERCE]], align 16
+// CHECK-NEXT:    [[CALL:%.*]] = call <vscale x 4 x i32> @fixed_callee(<vscale x 4 x i32> [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32>* [[COERCE1]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[CALL]], <vscale x 4 x i32>* [[TMP3]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, <16 x i32>* [[COERCE1]], align 16, !tbaa !2
+// CHECK-NEXT:    store <16 x i32> [[TMP4]], <16 x i32>* [[SAVED_CALL_RVALUE]], align 64, !tbaa !2
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <16 x i32>* [[SAVED_CALL_RVALUE]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    [[TMP5:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[CASTFIXEDSVE]], align 64, !tbaa !2
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP5]]
+//
+svint32_t sizeless_caller(svint32_t x) {
+  return fixed_callee(x);
+}
+
+//===----------------------------------------------------------------------===//
+// fixed, fixed
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @call_int32_ff(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OP1:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[OP2:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[OP1_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[OP2_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i32>* [[OP1]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[OP1_COERCE:%.*]], <vscale x 4 x i32>* [[TMP0]], align 16
+// CHECK-NEXT:    [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i32>* [[OP2]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[OP2_COERCE:%.*]], <vscale x 4 x i32>* [[TMP1]], align 16
+// CHECK-NEXT:    [[OP22:%.*]] = load <16 x i32>, <16 x i32>* [[OP2]], align 16, !tbaa !2
+// CHECK-NEXT:    store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    store <16 x i32> [[OP22]], <16 x i32>* [[OP2_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i32>* [[OP1_ADDR]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    [[TMP3:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP2]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32>* [[OP2_ADDR]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    [[TMP5:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP4]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP6]], <vscale x 4 x i32> [[TMP3]], <vscale x 4 x i32> [[TMP5]])
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP7]], <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !5
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP8:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[TMP8]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP9:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP9]]
+//
+fixed_int32_t call_int32_ff(svbool_t pg, fixed_int32_t op1, fixed_int32_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+// CHECK-LABEL: @call_float64_ff(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OP1:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[OP2:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[OP1_ADDR:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[OP2_ADDR:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x double>* [[OP1]] to <vscale x 2 x double>*
+// CHECK-NEXT:    store <vscale x 2 x double> [[OP1_COERCE:%.*]], <vscale x 2 x double>* [[TMP0]], align 16
+// CHECK-NEXT:    [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x double>* [[OP2]] to <vscale x 2 x double>*
+// CHECK-NEXT:    store <vscale x 2 x double> [[OP2_COERCE:%.*]], <vscale x 2 x double>* [[TMP1]], align 16
+// CHECK-NEXT:    [[OP22:%.*]] = load <8 x double>, <8 x double>* [[OP2]], align 16, !tbaa !2
+// CHECK-NEXT:    store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    store <8 x double> [[OP22]], <8 x double>* [[OP2_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x double>* [[OP1_ADDR]] to <vscale x 2 x double>*
+// CHECK-NEXT:    [[TMP3:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP2]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x double>* [[OP2_ADDR]] to <vscale x 2 x double>*
+// CHECK-NEXT:    [[TMP5:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP4]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP6:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP7:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP6]], <vscale x 2 x double> [[TMP3]], <vscale x 2 x double> [[TMP5]])
+// CHECK-NEXT:    store <vscale x 2 x double> [[TMP7]], <vscale x 2 x double>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !7
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 2 x double>* [[SAVED_CALL_RVALUE]] to <8 x double>*
+// CHECK-NEXT:    [[TMP8:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
+// CHECK-NEXT:    store <8 x double> [[TMP8]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP9:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP9]]
+//
+fixed_float64_t call_float64_ff(svbool_t pg, fixed_float64_t op1, fixed_float64_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+// CHECK-LABEL: @call_bool_ff(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OP1:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[OP2:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[OP1_ADDR:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[OP2_ADDR:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64*
+// CHECK-NEXT:    [[OP113:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8>* [[OP2]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    store <vscale x 16 x i1> [[OP2_COERCE:%.*]], <vscale x 16 x i1>* [[TMP2]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8>* [[OP2]] to i64*
+// CHECK-NEXT:    [[OP224:%.*]] = load i64, i64* [[TMP3]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64*
+// CHECK-NEXT:    store i64 [[OP113]], i64* [[TMP4]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to i64*
+// CHECK-NEXT:    store i64 [[OP224]], i64* [[TMP5]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    [[TMP7:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP6]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8>* [[OP2_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    [[TMP9:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP8]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP7]], <vscale x 16 x i1> [[TMP9]])
+// CHECK-NEXT:    store <vscale x 16 x i1> [[TMP10]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !9
+// CHECK-NEXT:    [[TMP11:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
+// CHECK-NEXT:    [[TMP12:%.*]] = load i64, i64* [[TMP11]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP13:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
+// CHECK-NEXT:    store i64 [[TMP12]], i64* [[TMP13]], align 16
+// CHECK-NEXT:    [[TMP14:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP14]]
+//
+fixed_bool_t call_bool_ff(svbool_t pg, fixed_bool_t op1, fixed_bool_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+//===----------------------------------------------------------------------===//
+// fixed, scalable
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @call_int32_fs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OP1:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[OP1_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i32>* [[OP1]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[OP1_COERCE:%.*]], <vscale x 4 x i32>* [[TMP0]], align 16
+// CHECK-NEXT:    [[OP11:%.*]] = load <16 x i32>, <16 x i32>* [[OP1]], align 16, !tbaa !2
+// CHECK-NEXT:    store <16 x i32> [[OP11]], <16 x i32>* [[OP1_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i32>* [[OP1_ADDR]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP3]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !5
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP5:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[TMP5]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP6]]
+//
+fixed_int32_t call_int32_fs(svbool_t pg, fixed_int32_t op1, svint32_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+// CHECK-LABEL: @call_float64_fs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OP1:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[OP1_ADDR:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x double>* [[OP1]] to <vscale x 2 x double>*
+// CHECK-NEXT:    store <vscale x 2 x double> [[OP1_COERCE:%.*]], <vscale x 2 x double>* [[TMP0]], align 16
+// CHECK-NEXT:    [[OP11:%.*]] = load <8 x double>, <8 x double>* [[OP1]], align 16, !tbaa !2
+// CHECK-NEXT:    store <8 x double> [[OP11]], <8 x double>* [[OP1_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x double>* [[OP1_ADDR]] to <vscale x 2 x double>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP3:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP3]], <vscale x 2 x double> [[TMP2]], <vscale x 2 x double> [[OP2:%.*]])
+// CHECK-NEXT:    store <vscale x 2 x double> [[TMP4]], <vscale x 2 x double>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !7
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 2 x double>* [[SAVED_CALL_RVALUE]] to <8 x double>*
+// CHECK-NEXT:    [[TMP5:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
+// CHECK-NEXT:    store <8 x double> [[TMP5]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP6]]
+//
+fixed_float64_t call_float64_fs(svbool_t pg, fixed_float64_t op1, svfloat64_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+// CHECK-LABEL: @call_bool_fs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[OP1:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[OP1_ADDR:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8>* [[OP1]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    store <vscale x 16 x i1> [[OP1_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8>* [[OP1]] to i64*
+// CHECK-NEXT:    [[OP112:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to i64*
+// CHECK-NEXT:    store i64 [[OP112]], i64* [[TMP2]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8>* [[OP1_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP3]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[TMP4]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT:    store <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !9
+// CHECK-NEXT:    [[TMP6:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
+// CHECK-NEXT:    [[TMP7:%.*]] = load i64, i64* [[TMP6]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP8:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
+// CHECK-NEXT:    store i64 [[TMP7]], i64* [[TMP8]], align 16
+// CHECK-NEXT:    [[TMP9:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP9]]
+//
+fixed_bool_t call_bool_fs(svbool_t pg, fixed_bool_t op1, svbool_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+//===----------------------------------------------------------------------===//
+// scalable, scalable
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @call_int32_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.sel.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]])
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !5
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[TMP2]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP3]]
+//
+fixed_int32_t call_int32_ss(svbool_t pg, svint32_t op1, svint32_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+// CHECK-LABEL: @call_float64_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]])
+// CHECK-NEXT:    [[TMP1:%.*]] = call <vscale x 2 x double> @llvm.aarch64.sve.sel.nxv2f64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x double> [[OP1:%.*]], <vscale x 2 x double> [[OP2:%.*]])
+// CHECK-NEXT:    store <vscale x 2 x double> [[TMP1]], <vscale x 2 x double>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !7
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 2 x double>* [[SAVED_CALL_RVALUE]] to <8 x double>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x double>, <8 x double>* [[CASTFIXEDSVE]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
+// CHECK-NEXT:    store <8 x double> [[TMP2]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP3]]
+//
+fixed_float64_t call_float64_ss(svbool_t pg, svfloat64_t op1, svfloat64_t op2) {
+  return svsel(pg, op1, op2);
+}
+
+// CHECK-LABEL: @call_bool_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.sel.nxv16i1(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i1> [[OP1:%.*]], <vscale x 16 x i1> [[OP2:%.*]])
+// CHECK-NEXT:    store <vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]], align 16, !tbaa !9
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <vscale x 16 x i1>* [[SAVED_CALL_RVALUE]] to i64*
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
+// CHECK-NEXT:    store i64 [[TMP2]], i64* [[TMP3]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP4]]
+//
+fixed_bool_t call_bool_ss(svbool_t pg, svbool_t op1, svbool_t op2) {
+  return svsel(pg, op1, op2);
+}
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-cast.c
@@ -0,0 +1,109 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// CHECK-LABEL: @to_svint32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[TYPE_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <16 x i32>* [[TYPE]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TYPE_COERCE:%.*]], <vscale x 4 x i32>* [[TMP0]], align 16
+// CHECK-NEXT:    [[TYPE1:%.*]] = load <16 x i32>, <16 x i32>* [[TYPE]], align 16, !tbaa !2
+// CHECK-NEXT:    store <16 x i32> [[TYPE1]], <16 x i32>* [[TYPE_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i32>* [[TYPE_ADDR]] to <vscale x 4 x i32>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+//
+svint32_t to_svint32_t(fixed_int32_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_svint32_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TYPE:%.*]], <vscale x 4 x i32>* [[TYPE_ADDR]], align 16, !tbaa !5
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 4 x i32>* [[TYPE_ADDR]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to <16 x i32>*
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP2]]
+//
+fixed_int32_t from_svint32_t(svint32_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @to_svfloat64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[TYPE_ADDR:%.*]] = alloca <8 x double>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x double>* [[TYPE]] to <vscale x 2 x double>*
+// CHECK-NEXT:    store <vscale x 2 x double> [[TYPE_COERCE:%.*]], <vscale x 2 x double>* [[TMP0]], align 16
+// CHECK-NEXT:    [[TYPE1:%.*]] = load <8 x double>, <8 x double>* [[TYPE]], align 16, !tbaa !2
+// CHECK-NEXT:    store <8 x double> [[TYPE1]], <8 x double>* [[TYPE_ADDR]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x double>* [[TYPE_ADDR]] to <vscale x 2 x double>*
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+svfloat64_t to_svfloat64_t(fixed_float64_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_svfloat64_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE_ADDR:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 2 x double>, align 16
+// CHECK-NEXT:    store <vscale x 2 x double> [[TYPE:%.*]], <vscale x 2 x double>* [[TYPE_ADDR]], align 16, !tbaa !7
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x double>* [[TYPE_ADDR]] to <8 x double>*
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x double>, <8 x double>* [[TMP0]], align 16, !tbaa !2
+// CHECK-NEXT:    [[RETVAL_0__SROA_CAST:%.*]] = bitcast <vscale x 2 x double>* [[RETVAL_COERCE]] to <8 x double>*
+// CHECK-NEXT:    store <8 x double> [[TMP1]], <8 x double>* [[RETVAL_0__SROA_CAST]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 2 x double>, <vscale x 2 x double>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 2 x double> [[TMP2]]
+//
+fixed_float64_t from_svfloat64_t(svfloat64_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @to_svbool_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[TYPE_ADDR:%.*]] = alloca <8 x i8>, align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i8>* [[TYPE]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    store <vscale x 16 x i1> [[TYPE_COERCE:%.*]], <vscale x 16 x i1>* [[TMP0]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i8>* [[TYPE]] to i64*
+// CHECK-NEXT:    [[TYPE12:%.*]] = load i64, i64* [[TMP1]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to i64*
+// CHECK-NEXT:    store i64 [[TYPE12]], i64* [[TMP2]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8>* [[TYPE_ADDR]] to <vscale x 16 x i1>*
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[TMP3]], align 16, !tbaa !2
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP4]]
+//
+svbool_t to_svbool_t(fixed_bool_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_svbool_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    store <vscale x 16 x i1> [[TYPE:%.*]], <vscale x 16 x i1>* [[TYPE_ADDR]], align 16, !tbaa !9
+// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[TYPE_ADDR]] to i64*
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, !tbaa !2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i64*
+// CHECK-NEXT:    store i64 [[TMP1]], i64* [[TMP2]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP3]]
+//
+fixed_bool_t from_svbool_t(svbool_t type) {
+  return type;
+}
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-codegen.c
@@ -0,0 +1,117 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+
+#include <arm_sve.h>
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+fixed_bool_t global_pred;
+fixed_int32_t global_vec;
+
+// CHECK-LABEL: @foo(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[PRED_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 2
+// CHECK-NEXT:    [[VEC_ADDR:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[PG:%.*]] = alloca <vscale x 16 x i1>, align 2
+// CHECK-NEXT:    [[SAVED_CALL_RVALUE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    store <vscale x 16 x i1> [[PRED:%.*]], <vscale x 16 x i1>* [[PRED_ADDR]], align 2
+// CHECK-NEXT:    store <vscale x 4 x i32> [[VEC:%.*]], <vscale x 4 x i32>* [[VEC_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PRED_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
+// CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_pred to <vscale x 16 x i1>*), align 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* @global_pred, align 2
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_pred to <vscale x 16 x i1>*), align 2
+// CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 16 x i1> @llvm.aarch64.sve.and.z.nxv16i1(<vscale x 16 x i1> [[TMP0]], <vscale x 16 x i1> [[TMP2]], <vscale x 16 x i1> [[TMP4]])
+// CHECK-NEXT:    store <vscale x 16 x i1> [[TMP5]], <vscale x 16 x i1>* [[PG]], align 2
+// CHECK-NEXT:    [[TMP6:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[PG]], align 2
+// CHECK-NEXT:    [[TMP7:%.*]] = load <16 x i32>, <16 x i32>* @global_vec, align 16
+// CHECK-NEXT:    [[TMP8:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* bitcast (<16 x i32>* @global_vec to <vscale x 4 x i32>*), align 16
+// CHECK-NEXT:    [[TMP9:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[VEC_ADDR]], align 16
+// CHECK-NEXT:    [[TMP10:%.*]] = call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[TMP6]])
+// CHECK-NEXT:    [[TMP11:%.*]] = call <vscale x 4 x i32> @llvm.aarch64.sve.add.nxv4i32(<vscale x 4 x i1> [[TMP10]], <vscale x 4 x i32> [[TMP8]], <vscale x 4 x i32> [[TMP9]])
+// CHECK-NEXT:    store <vscale x 4 x i32> [[TMP11]], <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]], align 16
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = bitcast <vscale x 4 x i32>* [[SAVED_CALL_RVALUE]] to <16 x i32>*
+// CHECK-NEXT:    [[TMP12:%.*]] = load <16 x i32>, <16 x i32>* [[CASTFIXEDSVE]], align 16
+// CHECK-NEXT:    store <16 x i32> [[TMP12]], <16 x i32>* [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP13:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to i8*
+// CHECK-NEXT:    [[TMP14:%.*]] = bitcast <16 x i32>* [[RETVAL]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP13]], i8* align 16 [[TMP14]], i64 64, i1 false)
+// CHECK-NEXT:    [[TMP15:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP15]]
+//
+fixed_int32_t foo(svbool_t pred, svint32_t vec) {
+  svbool_t pg = svand_z(pred, global_pred, global_pred);
+  return svadd_m(pg, global_vec, vec);
+}
+
+// CHECK-LABEL: @test_ptr_to_global(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[GLOBAL_VEC_PTR:%.*]] = alloca <16 x i32>*, align 8
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    store <16 x i32>* @global_vec, <16 x i32>** [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>*, <16 x i32>** [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[TMP0]], align 16
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to i8*
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32>* [[RETVAL]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+//
+fixed_int32_t test_ptr_to_global() {
+  fixed_int32_t *global_vec_ptr;
+  global_vec_ptr = &global_vec;
+  return *global_vec_ptr;
+}
+
+//
+// Test casting pointer from fixed-length array to scalable vector.
+// CHECK-LABEL: @array_arg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca <16 x i32>*, align 8
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 4 x i32>, align 16
+// CHECK-NEXT:    store <16 x i32>* [[ARR:%.*]], <16 x i32>** [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i32>*, <16 x i32>** [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <16 x i32>, <16 x i32>* [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i32>, <16 x i32>* [[ARRAYIDX]], align 16
+// CHECK-NEXT:    store <16 x i32> [[TMP1]], <16 x i32>* [[RETVAL]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 4 x i32>* [[RETVAL_COERCE]] to i8*
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32>* [[RETVAL]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 16 [[TMP3]], i64 64, i1 false)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 4 x i32>, <vscale x 4 x i32>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 4 x i32> [[TMP4]]
+//
+fixed_int32_t array_arg(fixed_int32_t arr[]) {
+  return arr[0];
+}
+
+// CHECK-LABEL: @address_of_array_idx(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <8 x i8>, align 2
+// CHECK-NEXT:    [[ARR:%.*]] = alloca [3 x <8 x i8>], align 2
+// CHECK-NEXT:    [[PARR:%.*]] = alloca <8 x i8>*, align 8
+// CHECK-NEXT:    [[RETVAL_COERCE:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[ARR]], i64 0, i64 0
+// CHECK-NEXT:    store <8 x i8>* [[ARRAYIDX]], <8 x i8>** [[PARR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>*, <8 x i8>** [[PARR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP0]], align 2
+// CHECK-NEXT:    store <8 x i8> [[TMP1]], <8 x i8>* [[RETVAL]], align 2
+// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <vscale x 16 x i1>* [[RETVAL_COERCE]] to i8*
+// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8>* [[RETVAL]] to i8*
+// CHECK-NEXT:    call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP2]], i8* align 2 [[TMP3]], i64 8, i1 false)
+// CHECK-NEXT:    [[TMP4:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* [[RETVAL_COERCE]], align 16
+// CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP4]]
+//
+fixed_bool_t address_of_array_idx() {
+  fixed_bool_t arr[3];
+  fixed_bool_t *parr;
+  parr = &arr[0];
+  return *parr;
+}
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-globals.c
@@ -0,0 +1,120 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=128 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+
+#include <arm_sve.h>
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+fixed_int64_t global_i64;
+fixed_bfloat16_t global_bf16;
+fixed_bool_t global_bool;
+
+//===----------------------------------------------------------------------===//
+// WRITES
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @write_global_i64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[V_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
+// CHECK-128-NEXT:    store <vscale x 2 x i64> [[V:%.*]], <vscale x 2 x i64>* [[V_ADDR]], align 16, !tbaa !2
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x i64>* [[V_ADDR]] to <2 x i64>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* [[TMP0]], align 16, !tbaa !6
+// CHECK-128-NEXT:    store <2 x i64> [[TMP1]], <2 x i64>* @global_i64, align 16, !tbaa !6
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_global_i64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[V_ADDR:%.*]] = alloca <vscale x 2 x i64>, align 16
+// CHECK-512-NEXT:    store <vscale x 2 x i64> [[V:%.*]], <vscale x 2 x i64>* [[V_ADDR]], align 16, !tbaa !2
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 2 x i64>* [[V_ADDR]] to <8 x i64>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <8 x i64>, <8 x i64>* [[TMP0]], align 16, !tbaa !6
+// CHECK-512-NEXT:    store <8 x i64> [[TMP1]], <8 x i64>* @global_i64, align 16, !tbaa !6
+// CHECK-512-NEXT:    ret void
+//
+void write_global_i64(svint64_t v) { global_i64 = v; }
+
+// CHECK-128-LABEL: @write_global_bf16(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[V_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
+// CHECK-128-NEXT:    store <vscale x 8 x bfloat> [[V:%.*]], <vscale x 8 x bfloat>* [[V_ADDR]], align 16, !tbaa !7
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat>* [[V_ADDR]] to <8 x bfloat>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, <8 x bfloat>* [[TMP0]], align 16, !tbaa !6
+// CHECK-128-NEXT:    store <8 x bfloat> [[TMP1]], <8 x bfloat>* @global_bf16, align 16, !tbaa !6
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_global_bf16(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[V_ADDR:%.*]] = alloca <vscale x 8 x bfloat>, align 16
+// CHECK-512-NEXT:    store <vscale x 8 x bfloat> [[V:%.*]], <vscale x 8 x bfloat>* [[V_ADDR]], align 16, !tbaa !7
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 8 x bfloat>* [[V_ADDR]] to <32 x bfloat>*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load <32 x bfloat>, <32 x bfloat>* [[TMP0]], align 16, !tbaa !6
+// CHECK-512-NEXT:    store <32 x bfloat> [[TMP1]], <32 x bfloat>* @global_bf16, align 16, !tbaa !6
+// CHECK-512-NEXT:    ret void
+//
+void write_global_bf16(svbfloat16_t v) { global_bf16 = v; }
+
+// CHECK-128-LABEL: @write_global_bool(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-128-NEXT:    store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, !tbaa !9
+// CHECK-128-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to <2 x i8>*
+// CHECK-128-NEXT:    [[TMP1:%.*]] = load <2 x i8>, <2 x i8>* [[TMP0]], align 16, !tbaa !6
+// CHECK-128-NEXT:    store <2 x i8> [[TMP1]], <2 x i8>* @global_bool, align 2, !tbaa !6
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_global_bool(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[V_ADDR:%.*]] = alloca <vscale x 16 x i1>, align 16
+// CHECK-512-NEXT:    store <vscale x 16 x i1> [[V:%.*]], <vscale x 16 x i1>* [[V_ADDR]], align 16, !tbaa !9
+// CHECK-512-NEXT:    [[TMP0:%.*]] = bitcast <vscale x 16 x i1>* [[V_ADDR]] to i64*
+// CHECK-512-NEXT:    [[TMP1:%.*]] = load i64, i64* [[TMP0]], align 16, !tbaa !6
+// CHECK-512-NEXT:    store i64 [[TMP1]], i64* bitcast (<8 x i8>* @global_bool to i64*), align 2, !tbaa !6
+// CHECK-512-NEXT:    ret void
+//
+void write_global_bool(svbool_t v) { global_bool = v; }
+
+//===----------------------------------------------------------------------===//
+// READS
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @read_global_i64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* bitcast (<2 x i64>* @global_i64 to <vscale x 2 x i64>*), align 16, !tbaa !6
+// CHECK-128-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+// CHECK-512-LABEL: @read_global_i64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <vscale x 2 x i64>, <vscale x 2 x i64>* bitcast (<8 x i64>* @global_i64 to <vscale x 2 x i64>*), align 16, !tbaa !6
+// CHECK-512-NEXT:    ret <vscale x 2 x i64> [[TMP0]]
+//
+svint64_t read_global_i64() { return global_i64; }
+
+// CHECK-128-LABEL: @read_global_bf16(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* bitcast (<8 x bfloat>* @global_bf16 to <vscale x 8 x bfloat>*), align 16, !tbaa !6
+// CHECK-128-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+// CHECK-512-LABEL: @read_global_bf16(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <vscale x 8 x bfloat>, <vscale x 8 x bfloat>* bitcast (<32 x bfloat>* @global_bf16 to <vscale x 8 x bfloat>*), align 16, !tbaa !6
+// CHECK-512-NEXT:    ret <vscale x 8 x bfloat> [[TMP0]]
+//
+svbfloat16_t read_global_bf16() { return global_bf16; }
+
+// CHECK-128-LABEL: @read_global_bool(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<2 x i8>* @global_bool to <vscale x 16 x i1>*), align 2, !tbaa !6
+// CHECK-128-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+// CHECK-512-LABEL: @read_global_bool(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <vscale x 16 x i1>, <vscale x 16 x i1>* bitcast (<8 x i8>* @global_bool to <vscale x 16 x i1>*), align 2, !tbaa !6
+// CHECK-512-NEXT:    ret <vscale x 16 x i1> [[TMP0]]
+//
+svbool_t read_global_bool() { return global_bool; }
Index: clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
===================================================================
--- /dev/null
+++ clang/test/CodeGen/attr-arm-sve-vector-bits-types.c
@@ -0,0 +1,581 @@
+// REQUIRES: aarch64-registered-target
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=128 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=256 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=512 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=1024 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -msve-vector-bits=2048 -fallow-half-arguments-and-returns -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-2048
+
+#include <arm_sve.h>
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+//===----------------------------------------------------------------------===//
+// Structs and unions
+//===----------------------------------------------------------------------===//
+#define DEFINE_STRUCT(ty) \
+  struct struct_##ty {    \
+    fixed_##ty##_t x;     \
+  } struct_##ty;
+
+#define DEFINE_UNION(ty) \
+  union union_##ty {     \
+    fixed_##ty##_t x;    \
+  } union_##ty;
+
+DEFINE_STRUCT(int8)
+DEFINE_STRUCT(int16)
+DEFINE_STRUCT(int32)
+DEFINE_STRUCT(int64)
+DEFINE_STRUCT(uint8)
+DEFINE_STRUCT(uint16)
+DEFINE_STRUCT(uint32)
+DEFINE_STRUCT(uint64)
+DEFINE_STRUCT(float16)
+DEFINE_STRUCT(float32)
+DEFINE_STRUCT(float64)
+DEFINE_STRUCT(bfloat16)
+DEFINE_STRUCT(bool)
+
+DEFINE_UNION(int8)
+DEFINE_UNION(int16)
+DEFINE_UNION(int32)
+DEFINE_UNION(int64)
+DEFINE_UNION(uint8)
+DEFINE_UNION(uint16)
+DEFINE_UNION(uint32)
+DEFINE_UNION(uint64)
+DEFINE_UNION(float16)
+DEFINE_UNION(float32)
+DEFINE_UNION(float64)
+DEFINE_UNION(bfloat16)
+DEFINE_UNION(bool)
+
+//===----------------------------------------------------------------------===//
+// Global variables
+//===----------------------------------------------------------------------===//
+fixed_int8_t global_i8;
+fixed_int16_t global_i16;
+fixed_int32_t global_i32;
+fixed_int64_t global_i64;
+
+fixed_uint8_t global_u8;
+fixed_uint16_t global_u16;
+fixed_uint32_t global_u32;
+fixed_uint64_t global_u64;
+
+fixed_float16_t global_f16;
+fixed_float32_t global_f32;
+fixed_float64_t global_f64;
+
+fixed_bfloat16_t global_bf16;
+
+fixed_bool_t global_bool;
+
+//===----------------------------------------------------------------------===//
+// Global arrays
+//===----------------------------------------------------------------------===//
+fixed_int8_t global_arr_i8[3];
+fixed_int16_t global_arr_i16[3];
+fixed_int32_t global_arr_i32[3];
+fixed_int64_t global_arr_i64[3];
+
+fixed_uint8_t global_arr_u8[3];
+fixed_uint16_t global_arr_u16[3];
+fixed_uint32_t global_arr_u32[3];
+fixed_uint64_t global_arr_u64[3];
+
+fixed_float16_t global_arr_f16[3];
+fixed_float32_t global_arr_f32[3];
+fixed_float64_t global_arr_f64[3];
+
+fixed_bfloat16_t global_arr_bf16[3];
+
+fixed_bool_t global_arr_bool[3];
+
+//===----------------------------------------------------------------------===//
+// Locals
+//===----------------------------------------------------------------------===//
+void f() {
+  // Variables
+  fixed_int8_t local_i8;
+  fixed_int16_t local_i16;
+  fixed_int32_t local_i32;
+  fixed_int64_t local_i64;
+  fixed_uint8_t local_u8;
+  fixed_uint16_t local_u16;
+  fixed_uint32_t local_u32;
+  fixed_uint64_t local_u64;
+  fixed_float16_t local_f16;
+  fixed_float32_t local_f32;
+  fixed_float64_t local_f64;
+  fixed_bfloat16_t local_bf16;
+  fixed_bool_t local_bool;
+
+  // Arrays
+  fixed_int8_t local_arr_i8[3];
+  fixed_int16_t local_arr_i16[3];
+  fixed_int32_t local_arr_i32[3];
+  fixed_int64_t local_arr_i64[3];
+  fixed_uint8_t local_arr_u8[3];
+  fixed_uint16_t local_arr_u16[3];
+  fixed_uint32_t local_arr_u32[3];
+  fixed_uint64_t local_arr_u64[3];
+  fixed_float16_t local_arr_f16[3];
+  fixed_float32_t local_arr_f32[3];
+  fixed_float64_t local_arr_f64[3];
+  fixed_bfloat16_t local_arr_bf16[3];
+  fixed_bool_t local_arr_bool[3];
+}
+
+//===----------------------------------------------------------------------===//
+// Structs and unions
+//===----------------------------------------------------------------------===//
+// CHECK-128:      %struct.struct_int8 = type { <16 x i8> }
+// CHECK-128-NEXT: %struct.struct_int16 = type { <8 x i16> }
+// CHECK-128-NEXT: %struct.struct_int32 = type { <4 x i32> }
+// CHECK-128-NEXT: %struct.struct_int64 = type { <2 x i64> }
+// CHECK-128-NEXT: %struct.struct_uint8 = type { <16 x i8> }
+// CHECK-128-NEXT: %struct.struct_uint16 = type { <8 x i16> }
+// CHECK-128-NEXT: %struct.struct_uint32 = type { <4 x i32> }
+// CHECK-128-NEXT: %struct.struct_uint64 = type { <2 x i64> }
+// CHECK-128-NEXT: %struct.struct_float16 = type { <8 x half> }
+// CHECK-128-NEXT: %struct.struct_float32 = type { <4 x float> }
+// CHECK-128-NEXT: %struct.struct_float64 = type { <2 x double> }
+// CHECK-128-NEXT: %struct.struct_bfloat16 = type { <8 x bfloat> }
+// CHECK-128-NEXT: %struct.struct_bool = type { <2 x i8> }
+
+// CHECK-256:      %struct.struct_int8 = type { <32 x i8> }
+// CHECK-256-NEXT: %struct.struct_int16 = type { <16 x i16> }
+// CHECK-256-NEXT: %struct.struct_int32 = type { <8 x i32> }
+// CHECK-256-NEXT: %struct.struct_int64 = type { <4 x i64> }
+// CHECK-256-NEXT: %struct.struct_uint8 = type { <32 x i8> }
+// CHECK-256-NEXT: %struct.struct_uint16 = type { <16 x i16> }
+// CHECK-256-NEXT: %struct.struct_uint32 = type { <8 x i32> }
+// CHECK-256-NEXT: %struct.struct_uint64 = type { <4 x i64> }
+// CHECK-256-NEXT: %struct.struct_float16 = type { <16 x half> }
+// CHECK-256-NEXT: %struct.struct_float32 = type { <8 x float> }
+// CHECK-256-NEXT: %struct.struct_float64 = type { <4 x double> }
+// CHECK-256-NEXT: %struct.struct_bfloat16 = type { <16 x bfloat> }
+// CHECK-256-NEXT: %struct.struct_bool = type { <4 x i8> }
+
+// CHECK-512:      %struct.struct_int8 = type { <64 x i8> }
+// CHECK-512-NEXT: %struct.struct_int16 = type { <32 x i16> }
+// CHECK-512-NEXT: %struct.struct_int32 = type { <16 x i32> }
+// CHECK-512-NEXT: %struct.struct_int64 = type { <8 x i64> }
+// CHECK-512-NEXT: %struct.struct_uint8 = type { <64 x i8> }
+// CHECK-512-NEXT: %struct.struct_uint16 = type { <32 x i16> }
+// CHECK-512-NEXT: %struct.struct_uint32 = type { <16 x i32> }
+// CHECK-512-NEXT: %struct.struct_uint64 = type { <8 x i64> }
+// CHECK-512-NEXT: %struct.struct_float16 = type { <32 x half> }
+// CHECK-512-NEXT: %struct.struct_float32 = type { <16 x float> }
+// CHECK-512-NEXT: %struct.struct_float64 = type { <8 x double> }
+// CHECK-512-NEXT: %struct.struct_bfloat16 = type { <32 x bfloat> }
+// CHECK-512-NEXT: %struct.struct_bool = type { <8 x i8> }
+
+// CHECK-1024:      %struct.struct_int8 = type { <128 x i8> }
+// CHECK-1024-NEXT: %struct.struct_int16 = type { <64 x i16> }
+// CHECK-1024-NEXT: %struct.struct_int32 = type { <32 x i32> }
+// CHECK-1024-NEXT: %struct.struct_int64 = type { <16 x i64> }
+// CHECK-1024-NEXT: %struct.struct_uint8 = type { <128 x i8> }
+// CHECK-1024-NEXT: %struct.struct_uint16 = type { <64 x i16> }
+// CHECK-1024-NEXT: %struct.struct_uint32 = type { <32 x i32> }
+// CHECK-1024-NEXT: %struct.struct_uint64 = type { <16 x i64> }
+// CHECK-1024-NEXT: %struct.struct_float16 = type { <64 x half> }
+// CHECK-1024-NEXT: %struct.struct_float32 = type { <32 x float> }
+// CHECK-1024-NEXT: %struct.struct_float64 = type { <16 x double> }
+// CHECK-1024-NEXT: %struct.struct_bfloat16 = type { <64 x bfloat> }
+// CHECK-1024-NEXT: %struct.struct_bool = type { <16 x i8> }
+
+// CHECK-2048:      %struct.struct_int8 = type { <256 x i8> }
+// CHECK-2048-NEXT: %struct.struct_int16 = type { <128 x i16> }
+// CHECK-2048-NEXT: %struct.struct_int32 = type { <64 x i32> }
+// CHECK-2048-NEXT: %struct.struct_int64 = type { <32 x i64> }
+// CHECK-2048-NEXT: %struct.struct_uint8 = type { <256 x i8> }
+// CHECK-2048-NEXT: %struct.struct_uint16 = type { <128 x i16> }
+// CHECK-2048-NEXT: %struct.struct_uint32 = type { <64 x i32> }
+// CHECK-2048-NEXT: %struct.struct_uint64 = type { <32 x i64> }
+// CHECK-2048-NEXT: %struct.struct_float16 = type { <128 x half> }
+// CHECK-2048-NEXT: %struct.struct_float32 = type { <64 x float> }
+// CHECK-2048-NEXT: %struct.struct_float64 = type { <32 x double> }
+// CHECK-2048-NEXT: %struct.struct_bfloat16 = type { <128 x bfloat> }
+// CHECK-2048-NEXT: %struct.struct_bool = type { <32 x i8> }
+
+// CHECK-128:      %union.union_int8 = type { <16 x i8> }
+// CHECK-128-NEXT: %union.union_int16 = type { <8 x i16> }
+// CHECK-128-NEXT: %union.union_int32 = type { <4 x i32> }
+// CHECK-128-NEXT: %union.union_int64 = type { <2 x i64> }
+// CHECK-128-NEXT: %union.union_uint8 = type { <16 x i8> }
+// CHECK-128-NEXT: %union.union_uint16 = type { <8 x i16> }
+// CHECK-128-NEXT: %union.union_uint32 = type { <4 x i32> }
+// CHECK-128-NEXT: %union.union_uint64 = type { <2 x i64> }
+// CHECK-128-NEXT: %union.union_float16 = type { <8 x half> }
+// CHECK-128-NEXT: %union.union_float32 = type { <4 x float> }
+// CHECK-128-NEXT: %union.union_float64 = type { <2 x double> }
+// CHECK-128-NEXT: %union.union_bfloat16 = type { <8 x bfloat> }
+// CHECK-128-NEXT: %union.union_bool = type { <2 x i8> }
+
+// CHECK-256:      %union.union_int8 = type { <32 x i8> }
+// CHECK-256-NEXT: %union.union_int16 = type { <16 x i16> }
+// CHECK-256-NEXT: %union.union_int32 = type { <8 x i32> }
+// CHECK-256-NEXT: %union.union_int64 = type { <4 x i64> }
+// CHECK-256-NEXT: %union.union_uint8 = type { <32 x i8> }
+// CHECK-256-NEXT: %union.union_uint16 = type { <16 x i16> }
+// CHECK-256-NEXT: %union.union_uint32 = type { <8 x i32> }
+// CHECK-256-NEXT: %union.union_uint64 = type { <4 x i64> }
+// CHECK-256-NEXT: %union.union_float16 = type { <16 x half> }
+// CHECK-256-NEXT: %union.union_float32 = type { <8 x float> }
+// CHECK-256-NEXT: %union.union_float64 = type { <4 x double> }
+// CHECK-256-NEXT: %union.union_bfloat16 = type { <16 x bfloat> }
+// CHECK-256-NEXT: %union.union_bool = type { <4 x i8> }
+
+// CHECK-512:      %union.union_int8 = type { <64 x i8> }
+// CHECK-512-NEXT: %union.union_int16 = type { <32 x i16> }
+// CHECK-512-NEXT: %union.union_int32 = type { <16 x i32> }
+// CHECK-512-NEXT: %union.union_int64 = type { <8 x i64> }
+// CHECK-512-NEXT: %union.union_uint8 = type { <64 x i8> }
+// CHECK-512-NEXT: %union.union_uint16 = type { <32 x i16> }
+// CHECK-512-NEXT: %union.union_uint32 = type { <16 x i32> }
+// CHECK-512-NEXT: %union.union_uint64 = type { <8 x i64> }
+// CHECK-512-NEXT: %union.union_float16 = type { <32 x half> }
+// CHECK-512-NEXT: %union.union_float32 = type { <16 x float> }
+// CHECK-512-NEXT: %union.union_float64 = type { <8 x double> }
+// CHECK-512-NEXT: %union.union_bfloat16 = type { <32 x bfloat> }
+// CHECK-512-NEXT: %union.union_bool = type { <8 x i8> }
+
+// CHECK-1024:      %union.union_int8 = type { <128 x i8> }
+// CHECK-1024-NEXT: %union.union_int16 = type { <64 x i16> }
+// CHECK-1024-NEXT: %union.union_int32 = type { <32 x i32> }
+// CHECK-1024-NEXT: %union.union_int64 = type { <16 x i64> }
+// CHECK-1024-NEXT: %union.union_uint8 = type { <128 x i8> }
+// CHECK-1024-NEXT: %union.union_uint16 = type { <64 x i16> }
+// CHECK-1024-NEXT: %union.union_uint32 = type { <32 x i32> }
+// CHECK-1024-NEXT: %union.union_uint64 = type { <16 x i64> }
+// CHECK-1024-NEXT: %union.union_float16 = type { <64 x half> }
+// CHECK-1024-NEXT: %union.union_float32 = type { <32 x float> }
+// CHECK-1024-NEXT: %union.union_float64 = type { <16 x double> }
+// CHECK-1024-NEXT: %union.union_bfloat16 = type { <64 x bfloat> }
+// CHECK-1024-NEXT: %union.union_bool = type { <16 x i8> }
+
+// CHECK-2048:      %union.union_int8 = type { <256 x i8> }
+// CHECK-2048-NEXT: %union.union_int16 = type { <128 x i16> }
+// CHECK-2048-NEXT: %union.union_int32 = type { <64 x i32> }
+// CHECK-2048-NEXT: %union.union_int64 = type { <32 x i64> }
+// CHECK-2048-NEXT: %union.union_uint8 = type { <256 x i8> }
+// CHECK-2048-NEXT: %union.union_uint16 = type { <128 x i16> }
+// CHECK-2048-NEXT: %union.union_uint32 = type { <64 x i32> }
+// CHECK-2048-NEXT: %union.union_uint64 = type { <32 x i64> }
+// CHECK-2048-NEXT: %union.union_float16 = type { <128 x half> }
+// CHECK-2048-NEXT: %union.union_float32 = type { <64 x float> }
+// CHECK-2048-NEXT: %union.union_float64 = type { <32 x double> }
+// CHECK-2048-NEXT: %union.union_bfloat16 = type { <128 x bfloat> }
+// CHECK-2048-NEXT: %union.union_bool = type { <32 x i8> }
+
+//===----------------------------------------------------------------------===//
+// Global variables
+//===----------------------------------------------------------------------===//
+// CHECK-128:      @global_i8 = global <16 x i8> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_i16 = global <8 x i16> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_i32 = global <4 x i32> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_i64 = global <2 x i64> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_u8 = global <16 x i8> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_u16 = global <8 x i16> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_u32 = global <4 x i32> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_u64 = global <2 x i64> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_f16 = global <8 x half> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_f32 = global <4 x float> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_f64 = global <2 x double> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_bf16 = global <8 x bfloat> zeroinitializer, align 16
+// CHECK-128-NEXT: @global_bool = global <2 x i8> zeroinitializer, align 2
+
+// CHECK-256:      @global_i8 = global <32 x i8> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_i16 = global <16 x i16> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_i32 = global <8 x i32> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_i64 = global <4 x i64> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_u8 = global <32 x i8> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_u16 = global <16 x i16> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_u32 = global <8 x i32> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_u64 = global <4 x i64> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_f16 = global <16 x half> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_f32 = global <8 x float> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_f64 = global <4 x double> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_bf16 = global <16 x bfloat> zeroinitializer, align 16
+// CHECK-NEXT-256: @global_bool = global <4 x i8> zeroinitializer, align 2
+
+// CHECK-512:      @global_i8 = global <64 x i8> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_i16 = global <32 x i16> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_i32 = global <16 x i32> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_i64 = global <8 x i64> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_u8 = global <64 x i8> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_u16 = global <32 x i16> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_u32 = global <16 x i32> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_u64 = global <8 x i64> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_f16 = global <32 x half> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_f32 = global <16 x float> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_f64 = global <8 x double> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_bf16 = global <32 x bfloat> zeroinitializer, align 16
+// CHECK-NEXT-512: @global_bool = global <8 x i8> zeroinitializer, align 2
+
+// CHECK-1024:      @global_i8 = global <128 x i8> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_i16 = global <64 x i16> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_i32 = global <32 x i32> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_i64 = global <16 x i64> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_u8 = global <128 x i8> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_u16 = global <64 x i16> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_u32 = global <32 x i32> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_u64 = global <16 x i64> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_f16 = global <64 x half> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_f32 = global <32 x float> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_f64 = global <16 x double> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_bf16 = global <64 x bfloat> zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_bool = global <16 x i8> zeroinitializer, align 2
+
+// CHECK-2048:      @global_i8 = global <256 x i8> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_i16 = global <128 x i16> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_i32 = global <64 x i32> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_i64 = global <32 x i64> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_u8 = global <256 x i8> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_u16 = global <128 x i16> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_u32 = global <64 x i32> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_u64 = global <32 x i64> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_f16 = global <128 x half> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_f32 = global <64 x float> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_f64 = global <32 x double> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_bf16 = global <128 x bfloat> zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_bool = global <32 x i8> zeroinitializer, align 2
+
+//===----------------------------------------------------------------------===//
+// Global arrays
+//===----------------------------------------------------------------------===//
+// CHECK-128:      @global_arr_i8 = global [3 x <16 x i8>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_i16 = global [3 x <8 x i16>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_i32 = global [3 x <4 x i32>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_i64 = global [3 x <2 x i64>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_u8 = global [3 x <16 x i8>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_u16 = global [3 x <8 x i16>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_u32 = global [3 x <4 x i32>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_u64 = global [3 x <2 x i64>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_f16 = global [3 x <8 x half>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_f32 = global [3 x <4 x float>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_f64 = global [3 x <2 x double>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_bf16 = global [3 x <8 x bfloat>] zeroinitializer, align 16
+// CHECK-128-NEXT: @global_arr_bool = global [3 x <2 x i8>] zeroinitializer, align 2
+
+// CHECK-256:      @global_arr_i8 = global [3 x <32 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_i16 = global [3 x <16 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_i32 = global [3 x <8 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_i64 = global [3 x <4 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_u8 = global [3 x <32 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_u16 = global [3 x <16 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_u32 = global [3 x <8 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_u64 = global [3 x <4 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_f16 = global [3 x <16 x half>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_f32 = global [3 x <8 x float>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_f64 = global [3 x <4 x double>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_bf16 = global [3 x <16 x bfloat>] zeroinitializer, align 16
+// CHECK-NEXT-256: @global_arr_bool = global [3 x <4 x i8>] zeroinitializer, align 2
+
+// CHECK-512:      @global_arr_i8 = global [3 x <64 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_i16 = global [3 x <32 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_i32 = global [3 x <16 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_i64 = global [3 x <8 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_u8 = global [3 x <64 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_u16 = global [3 x <32 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_u32 = global [3 x <16 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_u64 = global [3 x <8 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_f16 = global [3 x <32 x half>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_f32 = global [3 x <16 x float>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_f64 = global [3 x <8 x double>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_bf16 = global [3 x <32 x bfloat>] zeroinitializer, align 16
+// CHECK-NEXT-512: @global_arr_bool = global [3 x <8 x i8>] zeroinitializer, align 2
+
+// CHECK-1024:      @global_arr_i8 = global [3 x <128 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_i16 = global [3 x <64 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_i32 = global [3 x <32 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_i64 = global [3 x <16 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_u8 = global [3 x <128 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_u16 = global [3 x <64 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_u32 = global [3 x <32 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_u64 = global [3 x <16 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_f16 = global [3 x <64 x half>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_f32 = global [3 x <32 x float>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_f64 = global [3 x <16 x double>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_bf16 = global [3 x <64 x bfloat>] zeroinitializer, align 16
+// CHECK-NEXT-1024: @global_arr_bool = global [3 x <16 x i8>] zeroinitializer, align 2
+
+// CHECK-2048:      @global_arr_i8 = global [3 x <256 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_i16 = global [3 x <128 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_i32 = global [3 x <64 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_i64 = global [3 x <32 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_u8 = global [3 x <256 x i8>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_u16 = global [3 x <128 x i16>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_u32 = global [3 x <64 x i32>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_u64 = global [3 x <32 x i64>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_f16 = global [3 x <128 x half>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_f32 = global [3 x <64 x float>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_f64 = global [3 x <32 x double>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_bf16 = global [3 x <128 x bfloat>] zeroinitializer, align 16
+// CHECK-NEXT-2048: @global_arr_bool = global [3 x <32 x i8>] zeroinitializer, align 2
+
+//===----------------------------------------------------------------------===//
+// Local variables
+//===----------------------------------------------------------------------===//
+// CHECK-128:      %local_i8 = alloca <16 x i8>, align 16
+// CHECK-128-NEXT: %local_i16 = alloca <8 x i16>, align 16
+// CHECK-128-NEXT: %local_i32 = alloca <4 x i32>, align 16
+// CHECK-128-NEXT: %local_i64 = alloca <2 x i64>, align 16
+// CHECK-128-NEXT: %local_u8 = alloca <16 x i8>, align 16
+// CHECK-128-NEXT: %local_u16 = alloca <8 x i16>, align 16
+// CHECK-128-NEXT: %local_u32 = alloca <4 x i32>, align 16
+// CHECK-128-NEXT: %local_u64 = alloca <2 x i64>, align 16
+// CHECK-128-NEXT: %local_f16 = alloca <8 x half>, align 16
+// CHECK-128-NEXT: %local_f32 = alloca <4 x float>, align 16
+// CHECK-128-NEXT: %local_f64 = alloca <2 x double>, align 16
+// CHECK-128-NEXT: %local_bf16 = alloca <8 x bfloat>, align 16
+// CHECK-128-NEXT: %local_bool = alloca <2 x i8>, align 2
+
+// CHECK-256:      %local_i8 = alloca <32 x i8>, align 16
+// CHECK-256-NEXT: %local_i16 = alloca <16 x i16>, align 16
+// CHECK-256-NEXT: %local_i32 = alloca <8 x i32>, align 16
+// CHECK-256-NEXT: %local_i64 = alloca <4 x i64>, align 16
+// CHECK-256-NEXT: %local_u8 = alloca <32 x i8>, align 16
+// CHECK-256-NEXT: %local_u16 = alloca <16 x i16>, align 16
+// CHECK-256-NEXT: %local_u32 = alloca <8 x i32>, align 16
+// CHECK-256-NEXT: %local_u64 = alloca <4 x i64>, align 16
+// CHECK-256-NEXT: %local_f16 = alloca <16 x half>, align 16
+// CHECK-256-NEXT: %local_f32 = alloca <8 x float>, align 16
+// CHECK-256-NEXT: %local_f64 = alloca <4 x double>, align 16
+// CHECK-256-NEXT: %local_bf16 = alloca <16 x bfloat>, align 16
+// CHECK-256-NEXT: %local_bool = alloca <4 x i8>, align 2
+
+// CHECK-512:      %local_i8 = alloca <64 x i8>, align 16
+// CHECK-512-NEXT: %local_i16 = alloca <32 x i16>, align 16
+// CHECK-512-NEXT: %local_i32 = alloca <16 x i32>, align 16
+// CHECK-512-NEXT: %local_i64 = alloca <8 x i64>, align 16
+// CHECK-512-NEXT: %local_u8 = alloca <64 x i8>, align 16
+// CHECK-512-NEXT: %local_u16 = alloca <32 x i16>, align 16
+// CHECK-512-NEXT: %local_u32 = alloca <16 x i32>, align 16
+// CHECK-512-NEXT: %local_u64 = alloca <8 x i64>, align 16
+// CHECK-512-NEXT: %local_f16 = alloca <32 x half>, align 16
+// CHECK-512-NEXT: %local_f32 = alloca <16 x float>, align 16
+// CHECK-512-NEXT: %local_f64 = alloca <8 x double>, align 16
+// CHECK-512-NEXT: %local_bf16 = alloca <32 x bfloat>, align 16
+// CHECK-512-NEXT: %local_bool = alloca <8 x i8>, align 2
+
+// CHECK-1024:       %local_i8 = alloca <128 x i8>, align 16
+// CHECK-1024-NEXT:  %local_i16 = alloca <64 x i16>, align 16
+// CHECK-1024-NEXT:  %local_i32 = alloca <32 x i32>, align 16
+// CHECK-1024-NEXT:  %local_i64 = alloca <16 x i64>, align 16
+// CHECK-1024-NEXT:  %local_u8 = alloca <128 x i8>, align 16
+// CHECK-1024-NEXT:  %local_u16 = alloca <64 x i16>, align 16
+// CHECK-1024-NEXT:  %local_u32 = alloca <32 x i32>, align 16
+// CHECK-1024-NEXT:  %local_u64 = alloca <16 x i64>, align 16
+// CHECK-1024-NEXT:  %local_f16 = alloca <64 x half>, align 16
+// CHECK-1024-NEXT:  %local_f32 = alloca <32 x float>, align 16
+// CHECK-1024-NEXT:  %local_f64 = alloca <16 x double>, align 16
+// CHECK-1024-NEXT:  %local_bf16 = alloca <64 x bfloat>, align 16
+// CHECK-1024-NEXT:  %local_bool = alloca <16 x i8>, align 2
+
+// CHECK-2048:       %local_i8 = alloca <256 x i8>, align 16
+// CHECK-2048-NEXT:  %local_i16 = alloca <128 x i16>, align 16
+// CHECK-2048-NEXT:  %local_i32 = alloca <64 x i32>, align 16
+// CHECK-2048-NEXT:  %local_i64 = alloca <32 x i64>, align 16
+// CHECK-2048-NEXT:  %local_u8 = alloca <256 x i8>, align 16
+// CHECK-2048-NEXT:  %local_u16 = alloca <128 x i16>, align 16
+// CHECK-2048-NEXT:  %local_u32 = alloca <64 x i32>, align 16
+// CHECK-2048-NEXT:  %local_u64 = alloca <32 x i64>, align 16
+// CHECK-2048-NEXT:  %local_f16 = alloca <128 x half>, align 16
+// CHECK-2048-NEXT:  %local_f32 = alloca <64 x float>, align 16
+// CHECK-2048-NEXT:  %local_f64 = alloca <32 x double>, align 16
+// CHECK-2048-NEXT:  %local_bf16 = alloca <128 x bfloat>, align 16
+// CHECK-2048-NEXT:  %local_bool = alloca <32 x i8>, align 2
+
+//===----------------------------------------------------------------------===//
+// Local arrays
+//===----------------------------------------------------------------------===//
+// CHECK-128:      %local_arr_i8 = alloca [3 x <16 x i8>], align 16
+// CHECK-128-NEXT: %local_arr_i16 = alloca [3 x <8 x i16>], align 16
+// CHECK-128-NEXT: %local_arr_i32 = alloca [3 x <4 x i32>], align 16
+// CHECK-128-NEXT: %local_arr_i64 = alloca [3 x <2 x i64>], align 16
+// CHECK-128-NEXT: %local_arr_u8 = alloca [3 x <16 x i8>], align 16
+// CHECK-128-NEXT: %local_arr_u16 = alloca [3 x <8 x i16>], align 16
+// CHECK-128-NEXT: %local_arr_u32 = alloca [3 x <4 x i32>], align 16
+// CHECK-128-NEXT: %local_arr_u64 = alloca [3 x <2 x i64>], align 16
+// CHECK-128-NEXT: %local_arr_f16 = alloca [3 x <8 x half>], align 16
+// CHECK-128-NEXT: %local_arr_f32 = alloca [3 x <4 x float>], align 16
+// CHECK-128-NEXT: %local_arr_f64 = alloca [3 x <2 x double>], align 16
+// CHECK-128-NEXT: %local_arr_bf16 = alloca [3 x <8 x bfloat>], align 16
+// CHECK-128-NEXT: %local_arr_bool = alloca [3 x <2 x i8>], align 2
+
+// CHECK-256:      %local_arr_i8 = alloca [3 x <32 x i8>], align 16
+// CHECK-256-NEXT: %local_arr_i16 = alloca [3 x <16 x i16>], align 16
+// CHECK-256-NEXT: %local_arr_i32 = alloca [3 x <8 x i32>], align 16
+// CHECK-256-NEXT: %local_arr_i64 = alloca [3 x <4 x i64>], align 16
+// CHECK-256-NEXT: %local_arr_u8 = alloca [3 x <32 x i8>], align 16
+// CHECK-256-NEXT: %local_arr_u16 = alloca [3 x <16 x i16>], align 16
+// CHECK-256-NEXT: %local_arr_u32 = alloca [3 x <8 x i32>], align 16
+// CHECK-256-NEXT: %local_arr_u64 = alloca [3 x <4 x i64>], align 16
+// CHECK-256-NEXT: %local_arr_f16 = alloca [3 x <16 x half>], align 16
+// CHECK-256-NEXT: %local_arr_f32 = alloca [3 x <8 x float>], align 16
+// CHECK-256-NEXT: %local_arr_f64 = alloca [3 x <4 x double>], align 16
+// CHECK-256-NEXT: %local_arr_bf16 = alloca [3 x <16 x bfloat>], align 16
+// CHECK-256-NEXT: %local_arr_bool = alloca [3 x <4 x i8>], align 2
+
+// CHECK-512:      %local_arr_i8 = alloca [3 x <64 x i8>], align 16
+// CHECK-512-NEXT: %local_arr_i16 = alloca [3 x <32 x i16>], align 16
+// CHECK-512-NEXT: %local_arr_i32 = alloca [3 x <16 x i32>], align 16
+// CHECK-512-NEXT: %local_arr_i64 = alloca [3 x <8 x i64>], align 16
+// CHECK-512-NEXT: %local_arr_u8 = alloca [3 x <64 x i8>], align 16
+// CHECK-512-NEXT: %local_arr_u16 = alloca [3 x <32 x i16>], align 16
+// CHECK-512-NEXT: %local_arr_u32 = alloca [3 x <16 x i32>], align 16
+// CHECK-512-NEXT: %local_arr_u64 = alloca [3 x <8 x i64>], align 16
+// CHECK-512-NEXT: %local_arr_f16 = alloca [3 x <32 x half>], align 16
+// CHECK-512-NEXT: %local_arr_f32 = alloca [3 x <16 x float>], align 16
+// CHECK-512-NEXT: %local_arr_f64 = alloca [3 x <8 x double>], align 16
+// CHECK-512-NEXT: %local_arr_bf16 = alloca [3 x <32 x bfloat>], align 16
+// CHECK-512-NEXT: %local_arr_bool = alloca [3 x <8 x i8>], align 2
+
+// CHECK-1024:       %local_arr_i8 = alloca [3 x <128 x i8>], align 16
+// CHECK-1024-NEXT:  %local_arr_i16 = alloca [3 x <64 x i16>], align 16
+// CHECK-1024-NEXT:  %local_arr_i32 = alloca [3 x <32 x i32>], align 16
+// CHECK-1024-NEXT:  %local_arr_i64 = alloca [3 x <16 x i64>], align 16
+// CHECK-1024-NEXT:  %local_arr_u8 = alloca [3 x <128 x i8>], align 16
+// CHECK-1024-NEXT:  %local_arr_u16 = alloca [3 x <64 x i16>], align 16
+// CHECK-1024-NEXT:  %local_arr_u32 = alloca [3 x <32 x i32>], align 16
+// CHECK-1024-NEXT:  %local_arr_u64 = alloca [3 x <16 x i64>], align 16
+// CHECK-1024-NEXT:  %local_arr_f16 = alloca [3 x <64 x half>], align 16
+// CHECK-1024-NEXT:  %local_arr_f32 = alloca [3 x <32 x float>], align 16
+// CHECK-1024-NEXT:  %local_arr_f64 = alloca [3 x <16 x double>], align 16
+// CHECK-1024-NEXT:  %local_arr_bf16 = alloca [3 x <64 x bfloat>], align 16
+// CHECK-1024-NEXT:  %local_arr_bool = alloca [3 x <16 x i8>], align 2
+
+// CHECK-2048:       %local_arr_i8 = alloca [3 x <256 x i8>], align 16
+// CHECK-2048-NEXT:  %local_arr_i16 = alloca [3 x <128 x i16>], align 16
+// CHECK-2048-NEXT:  %local_arr_i32 = alloca [3 x <64 x i32>], align 16
+// CHECK-2048-NEXT:  %local_arr_i64 = alloca [3 x <32 x i64>], align 16
+// CHECK-2048-NEXT:  %local_arr_u8 = alloca [3 x <256 x i8>], align 16
+// CHECK-2048-NEXT:  %local_arr_u16 = alloca [3 x <128 x i16>], align 16
+// CHECK-2048-NEXT:  %local_arr_u32 = alloca [3 x <64 x i32>], align 16
+// CHECK-2048-NEXT:  %local_arr_u64 = alloca [3 x <32 x i64>], align 16
+// CHECK-2048-NEXT:  %local_arr_f16 = alloca [3 x <128 x half>], align 16
+// CHECK-2048-NEXT:  %local_arr_f32 = alloca [3 x <64 x float>], align 16
+// CHECK-2048-NEXT:  %local_arr_f64 = alloca [3 x <32 x double>], align 16
+// CHECK-2048-NEXT:  %local_arr_bf16 = alloca [3 x <128 x bfloat>], align 16
+// CHECK-2048-NEXT:  %local_arr_bool = alloca [3 x <32 x i8>], align 2
Index: clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
===================================================================
--- /dev/null
+++ clang/test/CodeGenCXX/aarch64-sve-fixedtypeinfo.cpp
@@ -0,0 +1,75 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +sve -target-feature +bf16 \
+// RUN:  -D__ARM_FEATURE_SVE -msve-vector-bits=128 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +sve -target-feature +bf16 \
+// RUN:  -D__ARM_FEATURE_SVE -msve-vector-bits=256 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +sve -target-feature +bf16 \
+// RUN:  -D__ARM_FEATURE_SVE -msve-vector-bits=512 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +sve -target-feature +bf16 \
+// RUN:  -D__ARM_FEATURE_SVE -msve-vector-bits=1024 | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +sve -target-feature +bf16 \
+// RUN:  -D__ARM_FEATURE_SVE -msve-vector-bits=2048 | FileCheck %s
+
+// This test verifies fixed-length vectors defined with the
+// 'arm_sve_vector_bits' attribute map to the same AAPCS64 ABI type as the
+// sizeless variants.
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+namespace std {
+class type_info;
+};
+
+typedef __SVInt8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVInt16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVInt32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVInt64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef __SVUint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVUint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVUint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVUint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef __SVFloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVFloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef __SVFloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef __SVBFloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef __SVBool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+auto &fs8 = typeid(fixed_int8_t);
+auto &fs16 = typeid(fixed_int16_t);
+auto &fs32 = typeid(fixed_int32_t);
+auto &fs64 = typeid(fixed_int64_t);
+
+auto &fu8 = typeid(fixed_uint8_t);
+auto &fu16 = typeid(fixed_uint16_t);
+auto &fu32 = typeid(fixed_uint32_t);
+auto &fu64 = typeid(fixed_uint64_t);
+
+auto &ff16 = typeid(fixed_float16_t);
+auto &ff32 = typeid(fixed_float32_t);
+auto &ff64 = typeid(fixed_float64_t);
+
+auto &fbf16 = typeid(fixed_bfloat16_t);
+
+auto &fb8 = typeid(fixed_bool_t);
+
+// CHECK-DAG: @_ZTIu10__SVInt8_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu10__SVInt8_t
+// CHECK-DAG: @_ZTIu11__SVInt16_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu11__SVInt16_t
+// CHECK-DAG: @_ZTIu11__SVInt32_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu11__SVInt32_t
+// CHECK-DAG: @_ZTIu11__SVInt64_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu11__SVInt64_t
+// CHECK-DAG: @_ZTIu11__SVUint8_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu11__SVUint8_t
+// CHECK-DAG: @_ZTIu12__SVUint16_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu12__SVUint16_t
+// CHECK-DAG: @_ZTIu12__SVUint32_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu12__SVUint32_t
+// CHECK-DAG: @_ZTIu12__SVUint64_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu12__SVUint64_t
+// CHECK-DAG: @_ZTIu13__SVFloat16_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu13__SVFloat16_t
+// CHECK-DAG: @_ZTIu13__SVFloat32_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu13__SVFloat32_t
+// CHECK-DAG: @_ZTIu13__SVFloat64_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu13__SVFloat64_t
+// CHECK-DAG: @_ZTIu14__SVBfloat16_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu14__SVBfloat16_t
+// CHECK-DAG: @_ZTIu10__SVBool_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu10__SVBool_t
Index: clang/test/Sema/attr-arm-sve-vector-bits.c
===================================================================
--- clang/test/Sema/attr-arm-sve-vector-bits.c
+++ clang/test/Sema/attr-arm-sve-vector-bits.c
@@ -102,8 +102,11 @@
   svint8_t ss8;
 
   void *sel __attribute__((unused));
-  sel = c ? ss8 : fs8; // expected-error {{incompatible operand types ('svint8_t' (aka '__SVInt8_t') and 'fixed_int8_t' (aka '__SVInt8_t'))}}
-  sel = c ? fs8 : ss8; // expected-error {{incompatible operand types ('fixed_int8_t' (aka '__SVInt8_t') and 'svint8_t' (aka '__SVInt8_t'))}}
+  sel = c ? ss8 : fs8; // expected-error {{cannot convert between fixed-length and sizeless vector}}
+  sel = c ? fs8 : ss8; // expected-error {{cannot convert between fixed-length and sizeless vector}}
+
+  sel = fs8 + ss8; // expected-error {{cannot convert between fixed-length and sizeless vector}}
+  sel = ss8 + fs8; // expected-error {{cannot convert between fixed-length and sizeless vector}}
 }
 
 // --------------------------------------------------------------------------//
@@ -192,14 +195,17 @@
 TEST_CAST(bool)
 
 // Test the implicit conversion only applies to valid types
-fixed_int8_t to_fixed_int8_t__from_svuint8_t(svuint8_t x) { return x; } // expected-error {{returning 'svuint8_t' (aka '__SVUint8_t') from a function with incompatible result type 'fixed_int8_t' (aka '__SVInt8_t')}}
-fixed_bool_t to_fixed_bool_t__from_svint32_t(svint32_t x) { return x; } // expected-error {{returning 'svint32_t' (aka '__SVInt32_t') from a function with incompatible result type 'fixed_bool_t' (aka '__SVBool_t')}}
+fixed_int8_t to_fixed_int8_t__from_svuint8_t(svuint8_t x) { return x; } // expected-error-re {{returning 'svuint8_t' (aka '__SVUint8_t') from a function with incompatible result type 'fixed_int8_t' (vector of {{[0-9]+}} 'signed char' values)}}
+fixed_bool_t to_fixed_bool_t__from_svint32_t(svint32_t x) { return x; } // expected-error-re {{returning 'svint32_t' (aka '__SVInt32_t') from a function with incompatible result type 'fixed_bool_t' (vector of {{[0-9]+}} 'unsigned char' values)}}
+// Test conversion between predicate and int8 is invalid, both have the same
+// memory representation.
+fixed_bool_t to_fixed_bool_t__from_svint8_t(svint8_t x) { return x; } // expected-error-re {{returning 'svint8_t' (aka '__SVInt8_t') from a function with incompatible result type 'fixed_bool_t' (vector of {{[0-9]+}} 'unsigned char' values)}}
 
 // Test the implicit conversion only applies to fixed-length types
 typedef signed int vSInt32 __attribute__((__vector_size__(16)));
-svint32_t to_svint32_t_from_gnut(vSInt32 x) { return x; } // expected-error {{returning 'vSInt32' (vector of 4 'int' values) from a function with incompatible result type 'svint32_t' (aka '__SVInt32_t')}}
+svint32_t to_svint32_t_from_gnut(vSInt32 x) { return x; } // expected-error-re {{returning 'vSInt32' (vector of {{[0-9]+}} 'int' values) from a function with incompatible result type 'svint32_t' (aka '__SVInt32_t')}}
 
-vSInt32 to_gnut_from_svint32_t(svint32_t x) { return x; } // expected-error {{returning 'svint32_t' (aka '__SVInt32_t') from a function with incompatible result type 'vSInt32' (vector of 4 'int' values)}}
+vSInt32 to_gnut_from_svint32_t(svint32_t x) { return x; } // expected-error-re {{returning 'svint32_t' (aka '__SVInt32_t') from a function with incompatible result type 'vSInt32' (vector of {{[0-9]+}} 'int' values)}}
 
 // --------------------------------------------------------------------------//
 // Test the scalable and fixed-length types can be used interchangeably
Index: llvm/lib/Analysis/InlineCost.cpp
===================================================================
--- llvm/lib/Analysis/InlineCost.cpp
+++ llvm/lib/Analysis/InlineCost.cpp
@@ -852,6 +852,10 @@
 }
 
 bool CallAnalyzer::visitAlloca(AllocaInst &I) {
+  // FIXME: Support scalable vector types.
+  if (isa<ScalableVectorType>(I.getAllocatedType()))
+    return false;
+
   // Check whether inlining will turn a dynamic alloca into a static
   // alloca and handle that case.
   if (I.isArrayAllocation()) {
Index: llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
===================================================================
--- llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -94,6 +94,10 @@
   Type *CastElTy = PTy->getElementType();
   if (!AllocElTy->isSized() || !CastElTy->isSized()) return nullptr;
 
+  // FIXME: Support scalable vector types.
+  if (isa<ScalableVectorType>(AllocElTy) || isa<ScalableVectorType>(CastElTy))
+    return nullptr;
+
   Align AllocElTyAlign = DL.getABITypeAlign(AllocElTy);
   Align CastElTyAlign = DL.getABITypeAlign(CastElTy);
   if (CastElTyAlign < AllocElTyAlign) return nullptr;
Index: llvm/lib/Transforms/Scalar/SROA.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/SROA.cpp
+++ llvm/lib/Transforms/Scalar/SROA.cpp
@@ -780,6 +780,9 @@
         LI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
       return PI.setAborted(&LI);
 
+    if (isa<ScalableVectorType>(LI.getType()))
+      return PI.setAborted(&LI);
+
     uint64_t Size = DL.getTypeStoreSize(LI.getType()).getFixedSize();
     return handleLoadOrStore(LI.getType(), LI, Offset, Size, LI.isVolatile());
   }
@@ -795,6 +798,9 @@
         SI.getPointerAddressSpace() != DL.getAllocaAddrSpace())
       return PI.setAborted(&SI);
 
+    if (isa<ScalableVectorType>(ValOp->getType()))
+      return PI.setAborted(&SI);
+
     uint64_t Size = DL.getTypeStoreSize(ValOp->getType()).getFixedSize();
 
     // If this memory access can be shown to *statically* extend outside the
@@ -1533,6 +1539,8 @@
     return nullptr;
 
   Type *ElementTy = Ty->getElementType();
+  if (isa<ScalableVectorType>(ElementTy))
+    return nullptr;
   if (!ElementTy->isSized())
     return nullptr; // We can't GEP through an unsized element.
   APInt ElementSize(Offset.getBitWidth(),