diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -389,6 +389,9 @@
 - Fixed incorrect ABI lowering of ``_Float16`` in the case of structs
   containing ``_Float16`` that are eligible for passing via GPR+FPR or
   FPR+FPR.
+- Added ``attribute(riscv_rvv_vector_bits(__RISCV_RVV_VLEN_BITS))`` to allow
+  the size of a RVV (RISC-V Vector) scalable type to be specified. This allows
+  RVV scalable vector types to be used in structs or in global variables.
 
 CUDA/HIP Language Changes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -2253,6 +2253,15 @@
   /// false otherwise.
   bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType);
 
+  /// Return true if the given types are an RISC-V vector builtin type and a
+  /// VectorType that is a fixed-length representation of the RISC-V vector
+  /// builtin type for a specific vector-length.
+  bool areCompatibleRVVTypes(QualType FirstType, QualType SecondType);
+
+  /// Return true if the given vector types are lax-compatible RISC-V vector
+  /// types as defined by -flax-vector-conversions=, false otherwise.
+  bool areLaxCompatibleRVVTypes(QualType FirstType, QualType SecondType);
+
   /// Return true if the type has been explicitly qualified with ObjC ownership.
   /// A type may be implicitly qualified with ownership under ObjC ARC, and in
   /// some cases the compiler treats these differently.
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1767,7 +1767,7 @@
 
     /// The kind of vector, either a generic vector type or some
     /// target-specific vector type such as for AltiVec or Neon.
-    unsigned VecKind : 3;
+    unsigned VecKind : 4;
     /// The number of elements in the vector.
     uint32_t NumElements;
   };
@@ -2046,6 +2046,16 @@
   /// 'arm_sve_vector_bits' type attribute as VectorType.
   QualType getSveEltType(const ASTContext &Ctx) const;
 
+  /// Determines if this is a sizeless type supported by the
+  /// 'riscv_rvv_vector_bits' type attribute, which can be applied to a single
+  /// RVV vector or mask.
+  bool isRVVVLSBuiltinType() const;
+
+  /// Returns the representative type for the element of an RVV builtin type.
+  /// This is used to represent fixed-length RVV vectors created with the
+  /// 'riscv_rvv_vector_bits' type attribute as VectorType.
+  QualType getRVVEltType(const ASTContext &Ctx) const;
+
   /// Types are partitioned into 3 broad categories (C99 6.2.5p1):
   /// object types, function types, and incomplete types.
 
@@ -3399,7 +3409,10 @@
     SveFixedLengthDataVector,
 
     /// is AArch64 SVE fixed-length predicate vector
-    SveFixedLengthPredicateVector
+    SveFixedLengthPredicateVector,
+
+    /// is RISC-V RVV fixed-length data vector
+    RVVFixedLengthDataVector,
   };
 
 protected:
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1871,6 +1871,16 @@
   let Documentation = [RISCVInterruptDocs];
 }
 
+def RISCVRVVVectorBits : TypeAttr {
+  let Spellings = [GNU<"riscv_rvv_vector_bits">];
+  let Subjects = SubjectList<[TypedefName], ErrorDiag>;
+  let Args = [UnsignedArgument<"NumBits">];
+  let Documentation = [RISCVRVVVectorBitsDocs];
+  let PragmaAttributeSupport = 0;
+  // Represented as VectorType instead.
+  let ASTNode = 0;
+}
+
 // This is not a TargetSpecificAttr so that is silently accepted and
 // ignored on other targets as encouraged by the OpenCL spec.
 //
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -2317,6 +2317,37 @@
   }];
 }
 
+def RISCVRVVVectorBitsDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+The ``riscv_rvv_vector_bits(N)`` attribute is used to define fixed-length
+variants of sizeless types.
+
+For example:
+
+.. code-block:: c
+
+  #include <riscv_vector.h>
+
+  #if __RISCV_RVV_VLEN_BITS==512
+  typedef vint8m1_t fixed_vint8m1_t __attribute__((riscv_rvv_vector_bits(512)));
+  #endif
+
+Creates a type ``fixed_vint8m1_t_t`` that is a fixed-length variant of
+``vint8m1_t`` that contains exactly 512-bits. Unlike ``vint8m1_t``, this type
+can be used in globals, structs, unions, and arrays, all of which are
+unsupported for sizeless types.
+
+The attribute can be attached to a single RVV vector (such as ``vint8m1_t``).
+The behavior of the attribute is undefined unless
+``N==__RISCV_RVV_VLEN_BITS``, the implementation defined feature macro that
+is enabled under the ``-mrvv-vector-bits`` flag. ``__RISCV_RVV_VLEN_BITS`` can
+only be a power of 2 between 64 and 65536.
+
+Only `*m1_t`(LMUL=1) types are supported at this time.
+}];
+}
+
 def AVRInterruptDocs : Documentation {
   let Category = DocCatFunction;
   let Heading = "interrupt (AVR)";
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3057,6 +3057,14 @@
   "value of 128, 256, 512, 1024 or 2048.">;
 def err_sve_vector_in_non_sve_target : Error<
   "SVE vector type %0 cannot be used in a target without sve">;
+def err_attribute_riscv_rvv_bits_unsupported : Error<
+  "%0 is only supported when '-mrvv-vector-bits=<bits>' is specified with a "
+  "value of \"zvl\" or a power 2 in the range [64,65536]">;
+def err_attribute_bad_rvv_vector_size : Error<
+  "invalid RVV vector size '%0', must match value set by "
+  "'-mrvv-vector-bits' ('%1')">;
+def err_attribute_invalid_rvv_type : Error<
+  "%0 attribute applied to non-RVV type %1">;
 def err_attribute_requires_positive_integer : Error<
   "%0 attribute requires a %select{positive|non-negative}1 "
   "integral compile time constant expression">;
@@ -3167,8 +3175,9 @@
   "vector size not an integral multiple of component size">;
 def err_attribute_zero_size : Error<"zero %0 size">;
 def err_attribute_size_too_large : Error<"%0 size too large">;
-def err_typecheck_sve_ambiguous : Error<
-  "cannot combine fixed-length and sizeless SVE vectors in expression, result is ambiguous (%0 and %1)">;
+def err_typecheck_sve_rvv_ambiguous : Error<
+  "cannot combine fixed-length and sizeless %select{SVE|RVV}0 vectors "
+  "in expression, result is ambiguous (%1 and %2)">;
 def err_typecheck_sve_rvv_gnu_ambiguous : Error<
   "cannot combine GNU and %select{SVE|RVV}0 vectors in expression, result is ambiguous (%1 and %2)">;
 def err_typecheck_vector_not_convertable_implict_truncation : Error<
diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def
--- a/clang/include/clang/Basic/RISCVVTypes.def
+++ b/clang/include/clang/Basic/RISCVVTypes.def
@@ -40,6 +40,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#ifndef RVV_TYPE
+#define RVV_TYPE(Name, Id, SingletonId)
+#endif
+
 #ifndef RVV_VECTOR_TYPE
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP)\
   RVV_TYPE(Name, Id, SingletonId)
diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h
--- a/clang/include/clang/Sema/Overload.h
+++ b/clang/include/clang/Sema/Overload.h
@@ -162,6 +162,9 @@
     /// Arm SVE Vector conversions
     ICK_SVE_Vector_Conversion,
 
+    /// RISC-V RVV Vector conversions
+    ICK_RVV_Vector_Conversion,
+
     /// A vector splat from an arithmetic type
     ICK_Vector_Splat,
 
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -12685,6 +12685,7 @@
                                        SourceLocation Loc, bool IsCompAssign);
 
   bool isValidSveBitcast(QualType srcType, QualType destType);
+  bool isValidRVVBitcast(QualType srcType, QualType destType);
 
   bool areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy);
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -85,6 +85,7 @@
 #include "llvm/Support/MD5.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/Triple.h"
 #include <algorithm>
 #include <cassert>
@@ -2010,6 +2011,9 @@
     else if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
       // Adjust the alignment for fixed-length SVE predicates.
       Align = 16;
+    else if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
+      // Adjust the alignment for fixed-length RVV vectors.
+      Align = 64;
     break;
   }
 
@@ -9454,7 +9458,9 @@
       First->getVectorKind() != VectorType::SveFixedLengthDataVector &&
       First->getVectorKind() != VectorType::SveFixedLengthPredicateVector &&
       Second->getVectorKind() != VectorType::SveFixedLengthDataVector &&
-      Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector)
+      Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector &&
+      First->getVectorKind() != VectorType::RVVFixedLengthDataVector &&
+      Second->getVectorKind() != VectorType::RVVFixedLengthDataVector)
     return true;
 
   return false;
@@ -9552,6 +9558,85 @@
          IsLaxCompatible(SecondType, FirstType);
 }
 
+/// getRVVTypeSize - Return RVV vector register size.
+static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) {
+  assert(Ty->isRVVVLSBuiltinType() && "Invalid RVV Type");
+  auto VScale = Context.getTargetInfo().getVScaleRange(Context.getLangOpts());
+  return VScale ? VScale->first * llvm::RISCV::RVVBitsPerBlock : 0;
+}
+
+bool ASTContext::areCompatibleRVVTypes(QualType FirstType,
+                                       QualType SecondType) {
+  assert(
+      ((FirstType->isRVVSizelessBuiltinType() && SecondType->isVectorType()) ||
+       (FirstType->isVectorType() && SecondType->isRVVSizelessBuiltinType())) &&
+      "Expected RVV builtin type and vector type!");
+
+  auto IsValidCast = [this](QualType FirstType, QualType SecondType) {
+    if (const auto *BT = FirstType->getAs<BuiltinType>()) {
+      if (const auto *VT = SecondType->getAs<VectorType>()) {
+        // Predicates have the same representation as uint8 so we also have to
+        // check the kind to make these types incompatible.
+        if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
+          return FirstType->isRVVVLSBuiltinType() &&
+                 VT->getElementType().getCanonicalType() ==
+                     FirstType->getRVVEltType(*this);
+        if (VT->getVectorKind() == VectorType::GenericVector)
+          return getTypeSize(SecondType) == getRVVTypeSize(*this, BT) &&
+                 hasSameType(VT->getElementType(),
+                             getBuiltinVectorTypeInfo(BT).ElementType);
+      }
+    }
+    return false;
+  };
+
+  return IsValidCast(FirstType, SecondType) ||
+         IsValidCast(SecondType, FirstType);
+}
+
+bool ASTContext::areLaxCompatibleRVVTypes(QualType FirstType,
+                                          QualType SecondType) {
+  assert(
+      ((FirstType->isRVVSizelessBuiltinType() && SecondType->isVectorType()) ||
+       (FirstType->isVectorType() && SecondType->isRVVSizelessBuiltinType())) &&
+      "Expected RVV builtin type and vector type!");
+
+  auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) {
+    const auto *BT = FirstType->getAs<BuiltinType>();
+    if (!BT)
+      return false;
+
+    const auto *VecTy = SecondType->getAs<VectorType>();
+    if (VecTy &&
+        (VecTy->getVectorKind() == VectorType::RVVFixedLengthDataVector ||
+         VecTy->getVectorKind() == VectorType::GenericVector)) {
+      const LangOptions::LaxVectorConversionKind LVCKind =
+          getLangOpts().getLaxVectorConversions();
+
+      // If __RISCV_RVV_VLEN_BITS != N do not allow GNU vector lax conversion.
+      if (VecTy->getVectorKind() == VectorType::GenericVector &&
+          getTypeSize(SecondType) != getRVVTypeSize(*this, BT))
+        return false;
+
+      // If -flax-vector-conversions=all is specified, the types are
+      // certainly compatible.
+      if (LVCKind == LangOptions::LaxVectorConversionKind::All)
+        return true;
+
+      // If -flax-vector-conversions=integer is specified, the types are
+      // compatible if the elements are integer types.
+      if (LVCKind == LangOptions::LaxVectorConversionKind::Integer)
+        return VecTy->getElementType().getCanonicalType()->isIntegerType() &&
+               FirstType->getRVVEltType(*this)->isIntegerType();
+    }
+
+    return false;
+  };
+
+  return IsLaxCompatible(FirstType, SecondType) ||
+         IsLaxCompatible(SecondType, FirstType);
+}
+
 bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const {
   while (true) {
     // __strong id
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -563,6 +563,8 @@
   void mangleAArch64NeonVectorType(const DependentVectorType *T);
   void mangleAArch64FixedSveVectorType(const VectorType *T);
   void mangleAArch64FixedSveVectorType(const DependentVectorType *T);
+  void mangleRISCVFixedRVVVectorType(const VectorType *T);
+  void mangleRISCVFixedRVVVectorType(const DependentVectorType *T);
 
   void mangleIntegerLiteral(QualType T, const llvm::APSInt &Value);
   void mangleFloatLiteral(QualType T, const llvm::APFloat &V);
@@ -3812,6 +3814,68 @@
   Diags.Report(T->getAttributeLoc(), DiagID);
 }
 
+void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) {
+  assert(T->getVectorKind() == VectorType::RVVFixedLengthDataVector &&
+         "expected fixed-length RVV vector!");
+
+  QualType EltType = T->getElementType();
+  assert(EltType->isBuiltinType() &&
+         "expected builtin type for fixed-length RVV vector!");
+
+  StringRef TypeName;
+  switch (cast<BuiltinType>(EltType)->getKind()) {
+  case BuiltinType::SChar:
+    TypeName = "__rvv_int8m1_t";
+    break;
+  case BuiltinType::UChar:
+    TypeName = "__rvv_uint8m1_t";
+    break;
+  case BuiltinType::Short:
+    TypeName = "__rvv_int16m1_t";
+    break;
+  case BuiltinType::UShort:
+    TypeName = "__rvv_uint16m1_t";
+    break;
+  case BuiltinType::Int:
+    TypeName = "__rvv_int32m1_t";
+    break;
+  case BuiltinType::UInt:
+    TypeName = "__rvv_uint32m1_t";
+    break;
+  case BuiltinType::Long:
+    TypeName = "__rvv_int64m1_t";
+    break;
+  case BuiltinType::ULong:
+    TypeName = "__rvv_uint64m1_t";
+    break;
+  case BuiltinType::Half:
+    TypeName = "__rvv_float16m1_t";
+    break;
+  case BuiltinType::Float:
+    TypeName = "__rvv_float32m1_t";
+    break;
+  case BuiltinType::Double:
+    TypeName = "__rvv_float64m1_t";
+    break;
+  default:
+    llvm_unreachable("unexpected element type for fixed-length RVV vector!");
+  }
+
+  unsigned VecSizeInBits = getASTContext().getTypeInfo(T).Width;
+
+  Out << "9__RVV_VLSI" << 'u' << TypeName.size() << TypeName << "Lj"
+      << VecSizeInBits << "EE";
+}
+
+void CXXNameMangler::mangleRISCVFixedRVVVectorType(
+    const DependentVectorType *T) {
+  DiagnosticsEngine &Diags = Context.getDiags();
+  unsigned DiagID = Diags.getCustomDiagID(
+      DiagnosticsEngine::Error,
+      "cannot mangle this dependent fixed-length RVV vector type yet");
+  Diags.Report(T->getAttributeLoc(), DiagID);
+}
+
 // GNU extension: vector types
 // <type>                  ::= <vector-type>
 // <vector-type>           ::= Dv <positive dimension number> _
@@ -3836,6 +3900,9 @@
              T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
     mangleAArch64FixedSveVectorType(T);
     return;
+  } else if (T->getVectorKind() == VectorType::RVVFixedLengthDataVector) {
+    mangleRISCVFixedRVVVectorType(T);
+    return;
   }
   Out << "Dv" << T->getNumElements() << '_';
   if (T->getVectorKind() == VectorType::AltiVecPixel)
@@ -3862,6 +3929,9 @@
              T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) {
     mangleAArch64FixedSveVectorType(T);
     return;
+  } else if (T->getVectorKind() == VectorType::RVVFixedLengthDataVector) {
+    mangleRISCVFixedRVVVectorType(T);
+    return;
   }
 
   Out << "Dv";
diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp
--- a/clang/lib/AST/JSONNodeDumper.cpp
+++ b/clang/lib/AST/JSONNodeDumper.cpp
@@ -662,6 +662,9 @@
   case VectorType::SveFixedLengthPredicateVector:
     JOS.attribute("vectorKind", "fixed-length sve predicate vector");
     break;
+  case VectorType::RVVFixedLengthDataVector:
+    JOS.attribute("vectorKind", "fixed-length rvv data vector");
+    break;
   }
 }
 
diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp
--- a/clang/lib/AST/TextNodeDumper.cpp
+++ b/clang/lib/AST/TextNodeDumper.cpp
@@ -1495,6 +1495,9 @@
   case VectorType::SveFixedLengthPredicateVector:
     OS << " fixed-length sve predicate vector";
     break;
+  case VectorType::RVVFixedLengthDataVector:
+    OS << " fixed-length rvv data vector";
+    break;
   }
   OS << " " << T->getNumElements();
 }
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -1928,6 +1929,11 @@
            (VT->getKind() >= BuiltinType::SveInt8 &&
             VT->getKind() <= BuiltinType::SveUint64);
   }
+  if (CanonicalType->isRVVVLSBuiltinType()) {
+    const auto *VT = cast<BuiltinType>(CanonicalType);
+    return (VT->getKind() >= BuiltinType::RvvInt8mf8 &&
+            VT->getKind() <= BuiltinType::RvvUint64m8);
+  }
 
   return isIntegerType();
 }
@@ -2425,6 +2431,28 @@
     return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType;
 }
 
+bool Type::isRVVVLSBuiltinType() const {
+  if (const BuiltinType *BT = getAs<BuiltinType>()) {
+    switch (BT->getKind()) {
+    // FIXME: Support more than LMUL 1.
+#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP) \
+    case BuiltinType::Id: \
+      return NF == 1 && (NumEls * ElBits) == llvm::RISCV::RVVBitsPerBlock;
+#include "clang/Basic/RISCVVTypes.def"
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
+QualType Type::getRVVEltType(const ASTContext &Ctx) const {
+  assert(isRVVVLSBuiltinType() && "unsupported type!");
+
+  const BuiltinType *BTy = getAs<BuiltinType>();
+  return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType;
+}
+
 bool QualType::isPODType(const ASTContext &Context) const {
   // C++11 has a more relaxed definition of POD.
   if (Context.getLangOpts().CPlusPlus11)
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -687,6 +687,19 @@
     else
       OS << T->getNumElements();
 
+    OS << " * sizeof(";
+    print(T->getElementType(), OS, StringRef());
+    // Multiply by 8 for the number of bits.
+    OS << ") * 8))) ";
+    printBefore(T->getElementType(), OS);
+    break;
+  case VectorType::RVVFixedLengthDataVector:
+    // FIXME: We prefer to print the size directly here, but have no way
+    // to get the size of the type.
+    OS << "__attribute__((__riscv_rvv_vector_bits__(";
+
+    OS << T->getNumElements();
+
     OS << " * sizeof(";
     print(T->getElementType(), OS, StringRef());
     // Multiply by 8 for the number of bits.
@@ -759,6 +772,20 @@
     OS << "))) ";
     printBefore(T->getElementType(), OS);
     break;
+  case VectorType::RVVFixedLengthDataVector:
+    // FIXME: We prefer to print the size directly here, but have no way
+    // to get the size of the type.
+    OS << "__attribute__((__riscv_rvv_vector_bits__(";
+    if (T->getSizeExpr()) {
+      T->getSizeExpr()->printPretty(OS, nullptr, Policy);
+      OS << " * sizeof(";
+      print(T->getElementType(), OS, StringRef());
+      // Multiply by 8 for the number of bits.
+      OS << ") * 8";
+    }
+    OS << "))) ";
+    printBefore(T->getElementType(), OS);
+    break;
   }
 }
 
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -200,6 +200,11 @@
     // Currently we support the v0.11 RISC-V V intrinsics.
     Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(0, 11)));
   }
+
+  auto VScale = getVScaleRange(Opts);
+  if (VScale && VScale->first && VScale->first == VScale->second)
+    Builder.defineMacro("__RISCV_RVV_VLEN_BITS",
+                        Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock));
 }
 
 static constexpr Builtin::Info BuiltinInfo[] = {
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -33,6 +33,7 @@
 #include "llvm/IR/Type.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
 #include "llvm/TargetParser/Triple.h"
 #include <algorithm>
 
@@ -11108,6 +11109,8 @@
                                                CharUnits Field1Off,
                                                llvm::Type *Field2Ty,
                                                CharUnits Field2Off) const;
+
+  ABIArgInfo coerceVLSVector(QualType Ty) const;
 };
 } // end anonymous namespace
 
@@ -11351,6 +11354,44 @@
   return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType);
 }
 
+// Fixed-length RVV vectors are represented as scalable vectors in function
+// args/return and must be coerced from fixed vectors.
+ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const {
+  assert(Ty->isVectorType() && "expected vector type!");
+
+  const auto *VT = Ty->castAs<VectorType>();
+  assert(VT->getVectorKind() == VectorType::RVVFixedLengthDataVector &&
+         "Unexpected vector kind");
+
+  assert(VT->getElementType()->isBuiltinType() && "expected builtin type!");
+
+  const auto *BT = VT->getElementType()->castAs<BuiltinType>();
+  llvm::ScalableVectorType *ResType = nullptr;
+  unsigned EltSize = getContext().getTypeSize(BT);
+  switch (BT->getKind()) {
+  default:
+    llvm_unreachable("unexpected builtin type for RISC-V vector!");
+  case BuiltinType::SChar:
+  case BuiltinType::UChar:
+  case BuiltinType::Short:
+  case BuiltinType::UShort:
+  case BuiltinType::Int:
+  case BuiltinType::UInt:
+  case BuiltinType::Long:
+  case BuiltinType::ULong:
+  case BuiltinType::LongLong:
+  case BuiltinType::ULongLong:
+  case BuiltinType::Half:
+  case BuiltinType::Float:
+  case BuiltinType::Double:
+    ResType =
+        llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()),
+                                      llvm::RISCV::RVVBitsPerBlock / EltSize);
+    break;
+  }
+  return ABIArgInfo::getDirect(ResType);
+}
+
 ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed,
                                               int &ArgGPRsLeft,
                                               int &ArgFPRsLeft) const {
@@ -11446,6 +11487,10 @@
     return ABIArgInfo::getDirect();
   }
 
+  if (const VectorType *VT = Ty->getAs<VectorType>())
+    if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector)
+      return coerceVLSVector(Ty);
+
   // Aggregates which are <= 2*XLen will be passed in registers if possible,
   // so coerce to integers.
   if (Size <= 2 * XLen) {
diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp
--- a/clang/lib/Sema/SemaCast.cpp
+++ b/clang/lib/Sema/SemaCast.cpp
@@ -2350,6 +2350,12 @@
       return TC_Success;
     }
 
+    // Allow bitcasting between SVE VLATs and VLSTs, and vice-versa.
+    if (Self.isValidRVVBitcast(SrcType, DestType)) {
+      Kind = CK_BitCast;
+      return TC_Success;
+    }
+
     // The non-vector type, if any, must have integral type.  This is
     // the same rule that C vector casts use; note, however, that enum
     // types are not integral in C++.
@@ -2937,6 +2943,13 @@
     return;
   }
 
+  // Allow bitcasting between compatible RVV vector types.
+  if ((SrcType->isVectorType() || DestType->isVectorType()) &&
+      Self.isValidRVVBitcast(SrcType, DestType)) {
+    Kind = CK_BitCast;
+    return;
+  }
+
   if (!DestType->isScalarType() && !DestType->isVectorType() &&
       !DestType->isMatrixType()) {
     const RecordType *DestRecordTy = DestType->getAs<RecordType>();
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -13994,6 +13994,13 @@
                                             QualType(Source, 0))))
       return;
 
+    if (Target->isRVVVLSBuiltinType() &&
+        (S.Context.areCompatibleRVVTypes(QualType(Target, 0),
+                                         QualType(Source, 0)) ||
+         S.Context.areLaxCompatibleRVVTypes(QualType(Target, 0),
+                                            QualType(Source, 0))))
+      return;
+
     if (!isa<VectorType>(Target)) {
       if (S.SourceMgr.isInSystemMacro(CC))
         return;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7977,6 +7977,28 @@
          ValidScalableConversion(destTy, srcTy);
 }
 
+/// Are the two types RVV-bitcast-compatible types? I.e. is bitcasting from the
+/// first RVV type (e.g. an RVV scalable type) to the second type (e.g. an RVV
+/// VLS type) allowed?
+///
+/// This will also return false if the two given types do not make sense from
+/// the perspective of RVV bitcasts.
+bool Sema::isValidRVVBitcast(QualType srcTy, QualType destTy) {
+  assert(srcTy->isVectorType() || destTy->isVectorType());
+
+  auto ValidScalableConversion = [](QualType FirstType, QualType SecondType) {
+    if (!FirstType->isRVVSizelessBuiltinType())
+      return false;
+
+    const auto *VecTy = SecondType->getAs<VectorType>();
+    return VecTy &&
+           VecTy->getVectorKind() == VectorType::RVVFixedLengthDataVector;
+  };
+
+  return ValidScalableConversion(srcTy, destTy) ||
+         ValidScalableConversion(destTy, srcTy);
+}
+
 /// Are the two types matrix types and do they have the same dimensions i.e.
 /// do they have the same number of rows and the same number of columns?
 bool Sema::areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy) {
@@ -9909,6 +9931,16 @@
         return Compatible;
       }
 
+    // Allow assignments between fixed-length and sizeless RVV vectors.
+    if ((LHSType->isRVVSizelessBuiltinType() && RHSType->isVectorType()) ||
+        (LHSType->isVectorType() && RHSType->isRVVSizelessBuiltinType())) {
+      if (Context.areCompatibleRVVTypes(LHSType, RHSType) ||
+          Context.areLaxCompatibleRVVTypes(LHSType, RHSType)) {
+        Kind = CK_BitCast;
+        return Compatible;
+      }
+    }
+
     return Incompatible;
   }
 
@@ -10765,18 +10797,30 @@
     }
   }
 
-  // Expressions containing fixed-length and sizeless SVE vectors are invalid
-  // since the ambiguity can affect the ABI.
-  auto IsSveConversion = [](QualType FirstType, QualType SecondType) {
+  // Expressions containing fixed-length and sizeless SVE/RVV vectors are
+  // invalid since the ambiguity can affect the ABI.
+  auto IsSveRVVConversion = [](QualType FirstType, QualType SecondType,
+                               unsigned &SVEorRVV) {
     const VectorType *VecType = SecondType->getAs<VectorType>();
-    return FirstType->isSizelessBuiltinType() && VecType &&
-           (VecType->getVectorKind() == VectorType::SveFixedLengthDataVector ||
-            VecType->getVectorKind() ==
-                VectorType::SveFixedLengthPredicateVector);
+    SVEorRVV = 0;
+    if (FirstType->isSizelessBuiltinType() && VecType) {
+      if (VecType->getVectorKind() == VectorType::SveFixedLengthDataVector ||
+          VecType->getVectorKind() == VectorType::SveFixedLengthPredicateVector)
+        return true;
+      if (VecType->getVectorKind() == VectorType::RVVFixedLengthDataVector) {
+        SVEorRVV = 1;
+        return true;
+      }
+    }
+
+    return false;
   };
 
-  if (IsSveConversion(LHSType, RHSType) || IsSveConversion(RHSType, LHSType)) {
-    Diag(Loc, diag::err_typecheck_sve_ambiguous) << LHSType << RHSType;
+  unsigned SVEorRVV;
+  if (IsSveRVVConversion(LHSType, RHSType, SVEorRVV) ||
+      IsSveRVVConversion(RHSType, LHSType, SVEorRVV)) {
+    Diag(Loc, diag::err_typecheck_sve_rvv_ambiguous)
+        << SVEorRVV << LHSType << RHSType;
     return QualType();
   }
 
@@ -10788,12 +10832,21 @@
     const VectorType *SecondVecType = SecondType->getAs<VectorType>();
 
     SVEorRVV = 0;
-    if (FirstVecType && SecondVecType)
-      return FirstVecType->getVectorKind() == VectorType::GenericVector &&
-             (SecondVecType->getVectorKind() ==
-                  VectorType::SveFixedLengthDataVector ||
-              SecondVecType->getVectorKind() ==
-                  VectorType::SveFixedLengthPredicateVector);
+    if (FirstVecType && SecondVecType) {
+      if (FirstVecType->getVectorKind() == VectorType::GenericVector) {
+        if (SecondVecType->getVectorKind() ==
+                VectorType::SveFixedLengthDataVector ||
+            SecondVecType->getVectorKind() ==
+                VectorType::SveFixedLengthPredicateVector)
+          return true;
+        if (SecondVecType->getVectorKind() ==
+            VectorType::RVVFixedLengthDataVector) {
+          SVEorRVV = 1;
+          return true;
+        }
+      }
+      return false;
+    }
 
     if (SecondVecType &&
         SecondVecType->getVectorKind() == VectorType::GenericVector) {
@@ -10808,7 +10861,6 @@
     return false;
   };
 
-  unsigned SVEorRVV;
   if (IsSveRVVGnuConversion(LHSType, RHSType, SVEorRVV) ||
       IsSveRVVGnuConversion(RHSType, LHSType, SVEorRVV)) {
     Diag(Loc, diag::err_typecheck_sve_rvv_gnu_ambiguous)
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -4571,6 +4571,7 @@
     break;
 
   case ICK_SVE_Vector_Conversion:
+  case ICK_RVV_Vector_Conversion:
     From = ImpCastExprToType(From, ToType, CK_BitCast, VK_PRValue,
                              /*BasePath=*/nullptr, CCK)
                .get();
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -141,6 +141,7 @@
     ICR_Conversion,
     ICR_Conversion,
     ICR_Conversion,
+    ICR_Conversion,
     ICR_OCL_Scalar_Widening,
     ICR_Complex_Real_Conversion,
     ICR_Conversion,
@@ -183,6 +184,7 @@
     "Derived-to-base conversion",
     "Vector conversion",
     "SVE Vector conversion",
+    "RVV Vector conversion",
     "Vector splat",
     "Complex-real conversion",
     "Block Pointer conversion",
@@ -1761,6 +1763,14 @@
       return true;
     }
 
+  if (ToType->isRVVSizelessBuiltinType() ||
+      FromType->isRVVSizelessBuiltinType())
+    if (S.Context.areCompatibleRVVTypes(FromType, ToType) ||
+        S.Context.areLaxCompatibleRVVTypes(FromType, ToType)) {
+      ICK = ICK_RVV_Vector_Conversion;
+      return true;
+    }
+
   // We can perform the conversion between vector types in the following cases:
   // 1)vector types are equivalent AltiVec and GCC vector types
   // 2)lax vector conversions are permitted and the vector types are of the
@@ -4327,6 +4337,20 @@
                  : ImplicitConversionSequence::Worse;
   }
 
+  if (SCS1.Second == ICK_RVV_Vector_Conversion &&
+      SCS2.Second == ICK_RVV_Vector_Conversion) {
+    bool SCS1IsCompatibleRVVVectorConversion =
+        S.Context.areCompatibleRVVTypes(SCS1.getFromType(), SCS1.getToType(2));
+    bool SCS2IsCompatibleRVVVectorConversion =
+        S.Context.areCompatibleRVVTypes(SCS2.getFromType(), SCS2.getToType(2));
+
+    if (SCS1IsCompatibleRVVVectorConversion !=
+        SCS2IsCompatibleRVVVectorConversion)
+      return SCS1IsCompatibleRVVVectorConversion
+                 ? ImplicitConversionSequence::Better
+                 : ImplicitConversionSequence::Worse;
+  }
+
   return ImplicitConversionSequence::Indistinguishable;
 }
 
@@ -5765,6 +5789,7 @@
   case ICK_Derived_To_Base:
   case ICK_Vector_Conversion:
   case ICK_SVE_Vector_Conversion:
+  case ICK_RVV_Vector_Conversion:
   case ICK_Vector_Splat:
   case ICK_Complex_Real:
   case ICK_Block_Pointer_Conversion:
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -40,6 +40,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
 #include <bitset>
 #include <optional>
 
@@ -8296,6 +8297,67 @@
                               CurType, CurType);
 }
 
+/// HandleRISCVRVVVectorBitsTypeAttr - The "riscv_rvv_vector_bits" attribute is
+/// used to create fixed-length versions of sizeless RVV types such as
+/// vint8m1_t_t.
+static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType,
+                                             ParsedAttr &Attr, Sema &S) {
+  // Target must have vector extension.
+  if (!S.Context.getTargetInfo().hasFeature("zve32x")) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported)
+        << Attr << "'zve32x'";
+    Attr.setInvalid();
+    return;
+  }
+
+  auto VScale = S.Context.getTargetInfo().getVScaleRange(S.getLangOpts());
+  if (!VScale || !VScale->first || VScale->first != VScale->second) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_riscv_rvv_bits_unsupported)
+        << Attr;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Check the attribute arguments.
+  if (Attr.getNumArgs() != 1) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
+        << Attr << 1;
+    Attr.setInvalid();
+    return;
+  }
+
+  // The vector size must be an integer constant expression.
+  llvm::APSInt RVVVectorSizeInBits(32);
+  if (!verifyValidIntegerConstantExpr(S, Attr, RVVVectorSizeInBits))
+    return;
+
+  unsigned VecSize = static_cast<unsigned>(RVVVectorSizeInBits.getZExtValue());
+
+  // The attribute vector size must match -mrvv-vector-bits.
+  // FIXME: Add support for types with LMUL!=1. Need to make sure size passed
+  // to attribute is equal to LMUL*VScaleMin*RVVBitsPerBlock.
+  if (VecSize != VScale->first * llvm::RISCV::RVVBitsPerBlock) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_bad_rvv_vector_size)
+        << VecSize << VScale->first * llvm::RISCV::RVVBitsPerBlock;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Attribute can only be attached to a single RVV vector type.
+  if (!CurType->isRVVVLSBuiltinType()) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_invalid_rvv_type)
+        << Attr << CurType;
+    Attr.setInvalid();
+    return;
+  }
+
+  QualType EltType = CurType->getRVVEltType(S.Context);
+  unsigned TypeSize = S.Context.getTypeSize(EltType);
+  VectorType::VectorKind VecKind = VectorType::RVVFixedLengthDataVector;
+  VecSize /= TypeSize;
+  CurType = S.Context.getVectorType(EltType, VecSize, VecKind);
+}
+
 /// Handle OpenCL Access Qualifier Attribute.
 static void HandleOpenCLAccessAttr(QualType &CurType, const ParsedAttr &Attr,
                                    Sema &S) {
@@ -8542,6 +8604,10 @@
       attr.setUsedAsTypeAttr();
       break;
     }
+    case ParsedAttr::AT_RISCVRVVVectorBits:
+      HandleRISCVRVVVectorBitsTypeAttr(type, attr, state.getSema());
+      attr.setUsedAsTypeAttr();
+      break;
     case ParsedAttr::AT_OpenCLAccess:
       HandleOpenCLAccessAttr(type, attr, state.getSema());
       attr.setUsedAsTypeAttr();
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c
@@ -0,0 +1,140 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+#define DEFINE_STRUCT(ty)   \
+  struct struct_##ty {      \
+    fixed_##ty##_t x, y[3]; \
+  } struct_##ty;
+
+DEFINE_STRUCT(int64m1)
+DEFINE_STRUCT(float64m1)
+
+//===----------------------------------------------------------------------===//
+// int64
+//===----------------------------------------------------------------------===//
+
+// CHECK-64-LABEL: @read_int64m1(
+// CHECK-64-NEXT:  entry:
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-64-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v1i64(<vscale x 1 x i64> undef, <1 x i64> [[TMP0]], i64 0)
+// CHECK-64-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+// CHECK-128-LABEL: @read_int64m1(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v2i64(<vscale x 1 x i64> undef, <2 x i64> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+// CHECK-256-LABEL: @read_int64m1(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-256-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[TMP0]], i64 0)
+// CHECK-256-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+vint64m1_t read_int64m1(struct struct_int64m1 *s) {
+  return s->y[0];
+}
+
+// CHECK-64-LABEL: @write_int64m1(
+// CHECK-64-NEXT:  entry:
+// CHECK-64-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64(<vscale x 1 x i64> [[X:%.*]], i64 0)
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-64-NEXT:    store <1 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT:    ret void
+//
+// CHECK-128-LABEL: @write_int64m1(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64(<vscale x 1 x i64> [[X:%.*]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    store <2 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-256-LABEL: @write_int64m1(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[X:%.*]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    store <4 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-256-NEXT:    ret void
+//
+void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) {
+  s->y[0] = x;
+}
+
+//===----------------------------------------------------------------------===//
+// float64
+//===----------------------------------------------------------------------===//
+
+// CHECK-64-LABEL: @read_float64m1(
+// CHECK-64-NEXT:  entry:
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-64-NEXT:    [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v1f64(<vscale x 1 x double> undef, <1 x double> [[TMP0]], i64 0)
+// CHECK-64-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+// CHECK-128-LABEL: @read_float64m1(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v2f64(<vscale x 1 x double> undef, <2 x double> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+// CHECK-256-LABEL: @read_float64m1(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-256-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[TMP0]], i64 0)
+// CHECK-256-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+vfloat64m1_t read_float64m1(struct struct_float64m1 *s) {
+  return s->y[0];
+}
+
+// CHECK-64-LABEL: @write_float64m1(
+// CHECK-64-NEXT:  entry:
+// CHECK-64-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64(<vscale x 1 x double> [[X:%.*]], i64 0)
+// CHECK-64-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-64-NEXT:    store <1 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-64-NEXT:    ret void
+//
+// CHECK-128-LABEL: @write_float64m1(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64(<vscale x 1 x double> [[X:%.*]], i64 0)
+// CHECK-128-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-128-NEXT:    store <2 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-256-LABEL: @write_float64m1(
+// CHECK-256-NEXT:  entry:
+// CHECK-256-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[X:%.*]], i64 0)
+// CHECK-256-NEXT:    [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1
+// CHECK-256-NEXT:    store <4 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]]
+// CHECK-256-NEXT:    ret void
+//
+void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) {
+  s->y[0] = x;
+}
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c
@@ -0,0 +1,117 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <riscv_vector.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+//===----------------------------------------------------------------------===//
+// Test caller/callee with VLST <-> VLAT
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @sizeless_callee(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[X:%.*]]
+//
+vint32m1_t sizeless_callee(vint32m1_t x) {
+  return x;
+}
+
+// CHECK-LABEL: @fixed_caller(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[X_COERCE:%.*]]
+//
+fixed_int32m1_t fixed_caller(fixed_int32m1_t x) {
+  return sizeless_callee(x);
+}
+
+// CHECK-LABEL: @fixed_callee(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[X_COERCE:%.*]]
+//
+fixed_int32m1_t fixed_callee(fixed_int32m1_t x) {
+  return x;
+}
+
+// CHECK-LABEL: @sizeless_caller(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[COERCE1:%.*]] = alloca <8 x i32>, align 8
+// CHECK-NEXT:    store <vscale x 2 x i32> [[X:%.*]], ptr [[COERCE1]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr [[COERCE1]], align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    [[CASTSCALABLESVE2:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE2]]
+//
+vint32m1_t sizeless_caller(vint32m1_t x) {
+  return fixed_callee(x);
+}
+
+//===----------------------------------------------------------------------===//
+// fixed, fixed
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @call_int32_ff(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[OP1_COERCE:%.*]], <vscale x 2 x i32> [[OP2_COERCE:%.*]], i64 8)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+fixed_int32m1_t call_int32_ff(fixed_int32m1_t op1, fixed_int32m1_t op2) {
+  return __riscv_vadd(op1, op2, N/32);
+}
+
+// CHECK-LABEL: @call_float64_ff(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> [[OP1_COERCE:%.*]], <vscale x 1 x double> [[OP2_COERCE:%.*]], i64 4)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TMP0]]
+//
+fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2) {
+  return __riscv_vfadd(op1, op2, N/64);
+}
+
+//===----------------------------------------------------------------------===//
+// fixed, scalable
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @call_int32_fs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[OP1_COERCE:%.*]], <vscale x 2 x i32> [[OP2:%.*]], i64 8)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+fixed_int32m1_t call_int32_fs(fixed_int32m1_t op1, vint32m1_t op2) {
+  return __riscv_vadd(op1, op2, N/32);
+}
+
+// CHECK-LABEL: @call_float64_fs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> [[OP1_COERCE:%.*]], <vscale x 1 x double> [[OP2:%.*]], i64 4)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TMP0]]
+//
+fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) {
+  return __riscv_vfadd(op1, op2, N/64);
+}
+
+//===----------------------------------------------------------------------===//
+// scalable, scalable
+//===----------------------------------------------------------------------===//
+
+// CHECK-LABEL: @call_int32_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[OP1:%.*]], <vscale x 2 x i32> [[OP2:%.*]], i64 8)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TMP0]]
+//
+fixed_int32m1_t call_int32_ss(vint32m1_t op1, vint32m1_t op2) {
+  return __riscv_vadd(op1, op2, N/32);
+}
+
+// CHECK-LABEL: @call_float64_ss(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TMP0:%.*]] = tail call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64(<vscale x 1 x double> poison, <vscale x 1 x double> [[OP1:%.*]], <vscale x 1 x double> [[OP2:%.*]], i64 4)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TMP0]]
+//
+fixed_float64m1_t call_float64_ss(vfloat64m1_t op1, vfloat64m1_t op2) {
+  return __riscv_vfadd(op1, op2, N/64);
+}
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c
@@ -0,0 +1,110 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef int32_t gnu_int32m1_t __attribute__((vector_size(N / 8)));
+
+// CHECK-LABEL: @to_vint32m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TYPE_COERCE:%.*]]
+//
+vint32m1_t to_vint32m1_t(fixed_int32m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_vint32m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[TYPE:%.*]]
+//
+fixed_int32m1_t from_vint32m1_t(vint32m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @to_vfloat64m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TYPE_COERCE:%.*]]
+//
+vfloat64m1_t to_vfloat64m1_t(fixed_float64m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_vfloat64m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    ret <vscale x 1 x double> [[TYPE:%.*]]
+//
+fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @lax_cast(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SAVED_VALUE:%.*]] = alloca <8 x i32>, align 32
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    store <8 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load <vscale x 1 x i64>, ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]]
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[TMP0]]
+//
+vint64m1_t lax_cast(fixed_int32m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE:%.*]], i64 0)
+// CHECK-NEXT:    store <8 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-NEXT:    ret void
+//
+gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TYPE]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) {
+  return type;
+}
+
+// CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TYPE_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA4]]
+// CHECK-NEXT:    ret void
+//
+gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) {
+  return type;
+}
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c
@@ -0,0 +1,79 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <riscv_vector.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+fixed_int32m1_t global_vec;
+
+// CHECK-LABEL: @test_ptr_to_global(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <8 x i32>, align 8
+// CHECK-NEXT:    [[GLOBAL_VEC_PTR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr @global_vec, ptr [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_VEC_PTR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 8
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t test_ptr_to_global() {
+  fixed_int32m1_t *global_vec_ptr;
+  global_vec_ptr = &global_vec;
+  return *global_vec_ptr;
+}
+
+//
+// Test casting pointer from fixed-length array to scalable vector.
+// CHECK-LABEL: @array_arg(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <8 x i32>, align 8
+// CHECK-NEXT:    [[ARR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store ptr [[ARR:%.*]], ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds <8 x i32>, ptr [[TMP0]], i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 8
+// CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) {
+  return arr[0];
+}
+
+// CHECK-LABEL: @test_cast(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <8 x i32>, align 8
+// CHECK-NEXT:    [[VEC_ADDR:%.*]] = alloca <vscale x 2 x i32>, align 4
+// CHECK-NEXT:    store <vscale x 2 x i32> [[VEC:%.*]], ptr [[VEC_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i32>, ptr @global_vec, align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP0]], i64 0)
+// CHECK-NEXT:    [[TMP1:%.*]] = load <vscale x 2 x i32>, ptr [[VEC_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 2 x i32> @llvm.riscv.vadd.nxv2i32.nxv2i32.i64(<vscale x 2 x i32> poison, <vscale x 2 x i32> [[CASTSCALABLESVE]], <vscale x 2 x i32> [[TMP1]], i64 8)
+// CHECK-NEXT:    [[CASTFIXEDSVE:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[TMP2]], i64 0)
+// CHECK-NEXT:    store <8 x i32> [[CASTFIXEDSVE]], ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8
+// CHECK-NEXT:    [[CASTSCALABLESVE1:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[TMP3]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE1]]
+//
+fixed_int32m1_t test_cast(vint32m1_t vec) {
+  return __riscv_vadd(global_vec, vec, N/32);
+}
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c
@@ -0,0 +1,60 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+fixed_int64m1_t global_i64;
+
+//===----------------------------------------------------------------------===//
+// WRITES
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @write_global_i64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64(<vscale x 1 x i64> [[V:%.*]], i64 0)
+// CHECK-128-NEXT:    store <1 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-128-NEXT:    ret void
+//
+// CHECK-512-LABEL: @write_global_i64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[CASTFIXEDSVE:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[V:%.*]], i64 0)
+// CHECK-512-NEXT:    store <4 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 8, !tbaa [[TBAA4:![0-9]+]]
+// CHECK-512-NEXT:    ret void
+//
+void write_global_i64(vint64m1_t v) { global_i64 = v; }
+
+//===----------------------------------------------------------------------===//
+// READS
+//===----------------------------------------------------------------------===//
+
+// CHECK-128-LABEL: @read_global_i64(
+// CHECK-128-NEXT:  entry:
+// CHECK-128-NEXT:    [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA4]]
+// CHECK-128-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v1i64(<vscale x 1 x i64> undef, <1 x i64> [[TMP0]], i64 0)
+// CHECK-128-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+// CHECK-512-LABEL: @read_global_i64(
+// CHECK-512-NEXT:  entry:
+// CHECK-512-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA4]]
+// CHECK-512-NEXT:    [[CASTSCALABLESVE:%.*]] = tail call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[TMP0]], i64 0)
+// CHECK-512-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+vint64m1_t read_global_i64() { return global_i64; }
diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c
@@ -0,0 +1,485 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+// Define valid fixed-width RVV types
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+//===----------------------------------------------------------------------===//
+// Structs and unions
+//===----------------------------------------------------------------------===//
+#define DEFINE_STRUCT(ty) \
+  struct struct_##ty {    \
+    fixed_##ty##_t x;     \
+  } struct_##ty;
+
+#define DEFINE_UNION(ty) \
+  union union_##ty {     \
+    fixed_##ty##_t x;    \
+  } union_##ty;
+
+DEFINE_STRUCT(int8m1)
+DEFINE_STRUCT(int16m1)
+DEFINE_STRUCT(int32m1)
+DEFINE_STRUCT(int64m1)
+DEFINE_STRUCT(uint8m1)
+DEFINE_STRUCT(uint16m1)
+DEFINE_STRUCT(uint32m1)
+DEFINE_STRUCT(uint64m1)
+DEFINE_STRUCT(float32m1)
+DEFINE_STRUCT(float64m1)
+
+DEFINE_UNION(int8m1)
+DEFINE_UNION(int16m1)
+DEFINE_UNION(int32m1)
+DEFINE_UNION(int64m1)
+DEFINE_UNION(uint8m1)
+DEFINE_UNION(uint16m1)
+DEFINE_UNION(uint32m1)
+DEFINE_UNION(uint64m1)
+DEFINE_UNION(float32m1)
+DEFINE_UNION(float64m1)
+
+//===----------------------------------------------------------------------===//
+// Global variables
+//===----------------------------------------------------------------------===//
+fixed_int8m1_t global_i8;
+fixed_int16m1_t global_i16;
+fixed_int32m1_t global_i32;
+fixed_int64m1_t global_i64;
+
+fixed_uint8m1_t global_u8;
+fixed_uint16m1_t global_u16;
+fixed_uint32m1_t global_u32;
+fixed_uint64m1_t global_u64;
+
+fixed_float32m1_t global_f32;
+fixed_float64m1_t global_f64;
+
+//===----------------------------------------------------------------------===//
+// Global arrays
+//===----------------------------------------------------------------------===//
+fixed_int8m1_t global_arr_i8[3];
+fixed_int16m1_t global_arr_i16[3];
+fixed_int32m1_t global_arr_i32[3];
+fixed_int64m1_t global_arr_i64[3];
+
+fixed_uint8m1_t global_arr_u8[3];
+fixed_uint16m1_t global_arr_u16[3];
+fixed_uint32m1_t global_arr_u32[3];
+fixed_uint64m1_t global_arr_u64[3];
+
+fixed_float32m1_t global_arr_f32[3];
+fixed_float64m1_t global_arr_f64[3];
+
+//===----------------------------------------------------------------------===//
+// Locals
+//===----------------------------------------------------------------------===//
+void f() {
+  // Variables
+  fixed_int8m1_t local_i8;
+  fixed_int16m1_t local_i16;
+  fixed_int32m1_t local_i32;
+  fixed_int64m1_t local_i64;
+  fixed_uint8m1_t local_u8;
+  fixed_uint16m1_t local_u16;
+  fixed_uint32m1_t local_u32;
+  fixed_uint64m1_t local_u64;
+  fixed_float32m1_t local_f32;
+  fixed_float64m1_t local_f64;
+
+  // Arrays
+  fixed_int8m1_t local_arr_i8[3];
+  fixed_int16m1_t local_arr_i16[3];
+  fixed_int32m1_t local_arr_i32[3];
+  fixed_int64m1_t local_arr_i64[3];
+  fixed_uint8m1_t local_arr_u8[3];
+  fixed_uint16m1_t local_arr_u16[3];
+  fixed_uint32m1_t local_arr_u32[3];
+  fixed_uint64m1_t local_arr_u64[3];
+  fixed_float32m1_t local_arr_f32[3];
+  fixed_float64m1_t local_arr_f64[3];
+}
+
+//===----------------------------------------------------------------------===//
+// Structs and unions
+//===----------------------------------------------------------------------===//
+// CHECK-64:      %struct.struct_int8m1 = type { <8 x i8> }
+// CHECK-64-NEXT: %struct.struct_int16m1 = type { <4 x i16> }
+// CHECK-64-NEXT: %struct.struct_int32m1 = type { <2 x i32> }
+// CHECK-64-NEXT: %struct.struct_int64m1 = type { <1 x i64> }
+// CHECK-64-NEXT: %struct.struct_uint8m1 = type { <8 x i8> }
+// CHECK-64-NEXT: %struct.struct_uint16m1 = type { <4 x i16> }
+// CHECK-64-NEXT: %struct.struct_uint32m1 = type { <2 x i32> }
+// CHECK-64-NEXT: %struct.struct_uint64m1 = type { <1 x i64> }
+// CHECK-64-NEXT: %struct.struct_float32m1 = type { <2 x float> }
+// CHECK-64-NEXT: %struct.struct_float64m1 = type { <1 x double> }
+
+// CHECK-128:      %struct.struct_int8m1 = type { <16 x i8> }
+// CHECK-128-NEXT: %struct.struct_int16m1 = type { <8 x i16> }
+// CHECK-128-NEXT: %struct.struct_int32m1 = type { <4 x i32> }
+// CHECK-128-NEXT: %struct.struct_int64m1 = type { <2 x i64> }
+// CHECK-128-NEXT: %struct.struct_uint8m1 = type { <16 x i8> }
+// CHECK-128-NEXT: %struct.struct_uint16m1 = type { <8 x i16> }
+// CHECK-128-NEXT: %struct.struct_uint32m1 = type { <4 x i32> }
+// CHECK-128-NEXT: %struct.struct_uint64m1 = type { <2 x i64> }
+// CHECK-128-NEXT: %struct.struct_float32m1 = type { <4 x float> }
+// CHECK-128-NEXT: %struct.struct_float64m1 = type { <2 x double> }
+
+// CHECK-256:      %struct.struct_int8m1 = type { <32 x i8> }
+// CHECK-256-NEXT: %struct.struct_int16m1 = type { <16 x i16> }
+// CHECK-256-NEXT: %struct.struct_int32m1 = type { <8 x i32> }
+// CHECK-256-NEXT: %struct.struct_int64m1 = type { <4 x i64> }
+// CHECK-256-NEXT: %struct.struct_uint8m1 = type { <32 x i8> }
+// CHECK-256-NEXT: %struct.struct_uint16m1 = type { <16 x i16> }
+// CHECK-256-NEXT: %struct.struct_uint32m1 = type { <8 x i32> }
+// CHECK-256-NEXT: %struct.struct_uint64m1 = type { <4 x i64> }
+// CHECK-256-NEXT: %struct.struct_float32m1 = type { <8 x float> }
+// CHECK-256-NEXT: %struct.struct_float64m1 = type { <4 x double> }
+
+// CHECK-512:      %struct.struct_int8m1 = type { <64 x i8> }
+// CHECK-512-NEXT: %struct.struct_int16m1 = type { <32 x i16> }
+// CHECK-512-NEXT: %struct.struct_int32m1 = type { <16 x i32> }
+// CHECK-512-NEXT: %struct.struct_int64m1 = type { <8 x i64> }
+// CHECK-512-NEXT: %struct.struct_uint8m1 = type { <64 x i8> }
+// CHECK-512-NEXT: %struct.struct_uint16m1 = type { <32 x i16> }
+// CHECK-512-NEXT: %struct.struct_uint32m1 = type { <16 x i32> }
+// CHECK-512-NEXT: %struct.struct_uint64m1 = type { <8 x i64> }
+// CHECK-512-NEXT: %struct.struct_float32m1 = type { <16 x float> }
+// CHECK-512-NEXT: %struct.struct_float64m1 = type { <8 x double> }
+
+// CHECK-1024:      %struct.struct_int8m1 = type { <128 x i8> }
+// CHECK-1024-NEXT: %struct.struct_int16m1 = type { <64 x i16> }
+// CHECK-1024-NEXT: %struct.struct_int32m1 = type { <32 x i32> }
+// CHECK-1024-NEXT: %struct.struct_int64m1 = type { <16 x i64> }
+// CHECK-1024-NEXT: %struct.struct_uint8m1 = type { <128 x i8> }
+// CHECK-1024-NEXT: %struct.struct_uint16m1 = type { <64 x i16> }
+// CHECK-1024-NEXT: %struct.struct_uint32m1 = type { <32 x i32> }
+// CHECK-1024-NEXT: %struct.struct_uint64m1 = type { <16 x i64> }
+// CHECK-1024-NEXT: %struct.struct_float32m1 = type { <32 x float> }
+// CHECK-1024-NEXT: %struct.struct_float64m1 = type { <16 x double> }
+
+// CHECK-64:      %union.union_int8m1 = type { <8 x i8> }
+// CHECK-64-NEXT: %union.union_int16m1 = type { <4 x i16> }
+// CHECK-64-NEXT: %union.union_int32m1 = type { <2 x i32> }
+// CHECK-64-NEXT: %union.union_int64m1 = type { <1 x i64> }
+// CHECK-64-NEXT: %union.union_uint8m1 = type { <8 x i8> }
+// CHECK-64-NEXT: %union.union_uint16m1 = type { <4 x i16> }
+// CHECK-64-NEXT: %union.union_uint32m1 = type { <2 x i32> }
+// CHECK-64-NEXT: %union.union_uint64m1 = type { <1 x i64> }
+// CHECK-64-NEXT: %union.union_float32m1 = type { <2 x float> }
+// CHECK-64-NEXT: %union.union_float64m1 = type { <1 x double> }
+
+// CHECK-128:      %union.union_int8m1 = type { <16 x i8> }
+// CHECK-128-NEXT: %union.union_int16m1 = type { <8 x i16> }
+// CHECK-128-NEXT: %union.union_int32m1 = type { <4 x i32> }
+// CHECK-128-NEXT: %union.union_int64m1 = type { <2 x i64> }
+// CHECK-128-NEXT: %union.union_uint8m1 = type { <16 x i8> }
+// CHECK-128-NEXT: %union.union_uint16m1 = type { <8 x i16> }
+// CHECK-128-NEXT: %union.union_uint32m1 = type { <4 x i32> }
+// CHECK-128-NEXT: %union.union_uint64m1 = type { <2 x i64> }
+// CHECK-128-NEXT: %union.union_float32m1 = type { <4 x float> }
+// CHECK-128-NEXT: %union.union_float64m1 = type { <2 x double> }
+
+// CHECK-256:      %union.union_int8m1 = type { <32 x i8> }
+// CHECK-256-NEXT: %union.union_int16m1 = type { <16 x i16> }
+// CHECK-256-NEXT: %union.union_int32m1 = type { <8 x i32> }
+// CHECK-256-NEXT: %union.union_int64m1 = type { <4 x i64> }
+// CHECK-256-NEXT: %union.union_uint8m1 = type { <32 x i8> }
+// CHECK-256-NEXT: %union.union_uint16m1 = type { <16 x i16> }
+// CHECK-256-NEXT: %union.union_uint32m1 = type { <8 x i32> }
+// CHECK-256-NEXT: %union.union_uint64m1 = type { <4 x i64> }
+// CHECK-256-NEXT: %union.union_float32m1 = type { <8 x float> }
+// CHECK-256-NEXT: %union.union_float64m1 = type { <4 x double> }
+
+// CHECK-512:      %union.union_int8m1 = type { <64 x i8> }
+// CHECK-512-NEXT: %union.union_int16m1 = type { <32 x i16> }
+// CHECK-512-NEXT: %union.union_int32m1 = type { <16 x i32> }
+// CHECK-512-NEXT: %union.union_int64m1 = type { <8 x i64> }
+// CHECK-512-NEXT: %union.union_uint8m1 = type { <64 x i8> }
+// CHECK-512-NEXT: %union.union_uint16m1 = type { <32 x i16> }
+// CHECK-512-NEXT: %union.union_uint32m1 = type { <16 x i32> }
+// CHECK-512-NEXT: %union.union_uint64m1 = type { <8 x i64> }
+// CHECK-512-NEXT: %union.union_float32m1 = type { <16 x float> }
+// CHECK-512-NEXT: %union.union_float64m1 = type { <8 x double> }
+
+// CHECK-1024:      %union.union_int8m1 = type { <128 x i8> }
+// CHECK-1024-NEXT: %union.union_int16m1 = type { <64 x i16> }
+// CHECK-1024-NEXT: %union.union_int32m1 = type { <32 x i32> }
+// CHECK-1024-NEXT: %union.union_int64m1 = type { <16 x i64> }
+// CHECK-1024-NEXT: %union.union_uint8m1 = type { <128 x i8> }
+// CHECK-1024-NEXT: %union.union_uint16m1 = type { <64 x i16> }
+// CHECK-1024-NEXT: %union.union_uint32m1 = type { <32 x i32> }
+// CHECK-1024-NEXT: %union.union_uint64m1 = type { <16 x i64> }
+// CHECK-1024-NEXT: %union.union_float32m1 = type { <32 x float> }
+// CHECK-1024-NEXT: %union.union_float64m1 = type { <16 x double> }
+
+//===----------------------------------------------------------------------===//
+// Global variables
+//===----------------------------------------------------------------------===//
+// CHECK-64:      @global_i8 ={{.*}} global <8 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i16 ={{.*}} global <4 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i32 ={{.*}} global <2 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_i64 ={{.*}} global <1 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u8 ={{.*}} global <8 x i8> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u16 ={{.*}} global <4 x i16> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u32 ={{.*}} global <2 x i32> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_u64 ={{.*}} global <1 x i64> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f32 ={{.*}} global <2 x float> zeroinitializer, align 8
+// CHECK-64-NEXT: @global_f64 ={{.*}} global <1 x double> zeroinitializer, align 8
+
+// CHECK-128:      @global_i8 ={{.*}} global <16 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i16 ={{.*}} global <8 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i32 ={{.*}} global <4 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_i64 ={{.*}} global <2 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u8 ={{.*}} global <16 x i8> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u16 ={{.*}} global <8 x i16> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u32 ={{.*}} global <4 x i32> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_u64 ={{.*}} global <2 x i64> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f32 ={{.*}} global <4 x float> zeroinitializer, align 8
+// CHECK-128-NEXT: @global_f64 ={{.*}} global <2 x double> zeroinitializer, align 8
+
+// CHECK-256:      @global_i8 ={{.*}} global <32 x i8> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_i16 ={{.*}} global <16 x i16> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_i32 ={{.*}} global <8 x i32> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_i64 ={{.*}} global <4 x i64> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_u8 ={{.*}} global <32 x i8> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_u16 ={{.*}} global <16 x i16> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_u32 ={{.*}} global <8 x i32> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_u64 ={{.*}} global <4 x i64> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_f32 ={{.*}} global <8 x float> zeroinitializer, align 8
+// CHECK-NEXT-256: @global_f64 ={{.*}} global <4 x double> zeroinitializer, align 8
+
+// CHECK-512:      @global_i8 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_i16 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_i32 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_i64 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_u8 ={{.*}} global <64 x i8> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_u16 ={{.*}} global <32 x i16> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_u32 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_u64 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_f32 ={{.*}} global <16 x float> zeroinitializer, align 8
+// CHECK-NEXT-512: @global_f64 ={{.*}} global <8 x double> zeroinitializer, align 8
+
+// CHECK-1024:      @global_i8 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_i16 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_i32 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_i64 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_u8 ={{.*}} global <128 x i8> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_u16 ={{.*}} global <64 x i16> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_u32 ={{.*}} global <32 x i32> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_u64 ={{.*}} global <16 x i64> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_f32 ={{.*}} global <32 x float> zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_f64 ={{.*}} global <16 x double> zeroinitializer, align 8
+
+//===----------------------------------------------------------------------===//
+// Global arrays
+//===----------------------------------------------------------------------===//
+// CHECK-64:      @global_arr_i8 ={{.*}} global [3 x <8 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i16 ={{.*}} global [3 x <4 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i32 ={{.*}} global [3 x <2 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_i64 ={{.*}} global [3 x <1 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u8 ={{.*}} global [3 x <8 x i8>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u16 ={{.*}} global [3 x <4 x i16>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u32 ={{.*}} global [3 x <2 x i32>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_u64 ={{.*}} global [3 x <1 x i64>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f32 ={{.*}} global [3 x <2 x float>] zeroinitializer, align 8
+// CHECK-64-NEXT: @global_arr_f64 ={{.*}} global [3 x <1 x double>] zeroinitializer, align 8
+
+// CHECK-128:      @global_arr_i8 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i16 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i32 ={{.*}} global [3 x <4 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_i64 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u8 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u16 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u32 ={{.*}} global [3 x <4 x i32>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_u64 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f32 ={{.*}} global [3 x <4 x float>] zeroinitializer, align 8
+// CHECK-128-NEXT: @global_arr_f64 ={{.*}} global [3 x <2 x double>] zeroinitializer, align 8
+
+// CHECK-256:      @global_arr_i8 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_i16 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_i32 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_i64 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_u8 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_u16 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_u32 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_u64 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_f32 ={{.*}} global [3 x <8 x float>] zeroinitializer, align 8
+// CHECK-NEXT-256: @global_arr_f64 ={{.*}} global [3 x <4 x double>] zeroinitializer, align 8
+
+// CHECK-512:      @global_arr_i8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_i16 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_i32 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_i64 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_u8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_u16 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_u32 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_u64 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_f32 ={{.*}} global [3 x <16 x float>] zeroinitializer, align 8
+// CHECK-NEXT-512: @global_arr_f64 ={{.*}} global [3 x <8 x double>] zeroinitializer, align 8
+
+// CHECK-1024:      @global_arr_i8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_i16 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_i32 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_i64 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_u8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_u16 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_u32 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_u64 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_f32 ={{.*}} global [3 x <32 x float>] zeroinitializer, align 8
+// CHECK-NEXT-1024: @global_arr_f64 ={{.*}} global [3 x <16 x double>] zeroinitializer, align 8
+
+//===----------------------------------------------------------------------===//
+// Local variables
+//===----------------------------------------------------------------------===//
+// CHECK-64:      %local_i8 = alloca <8 x i8>, align 8
+// CHECK-64-NEXT: %local_i16 = alloca <4 x i16>, align 8
+// CHECK-64-NEXT: %local_i32 = alloca <2 x i32>, align 8
+// CHECK-64-NEXT: %local_i64 = alloca <1 x i64>, align 8
+// CHECK-64-NEXT: %local_u8 = alloca <8 x i8>, align 8
+// CHECK-64-NEXT: %local_u16 = alloca <4 x i16>, align 8
+// CHECK-64-NEXT: %local_u32 = alloca <2 x i32>, align 8
+// CHECK-64-NEXT: %local_u64 = alloca <1 x i64>, align 8
+// CHECK-64-NEXT: %local_f32 = alloca <2 x float>, align 8
+// CHECK-64-NEXT: %local_f64 = alloca <1 x double>, align 8
+
+// CHECK-128:      %local_i8 = alloca <16 x i8>, align 8
+// CHECK-128-NEXT: %local_i16 = alloca <8 x i16>, align 8
+// CHECK-128-NEXT: %local_i32 = alloca <4 x i32>, align 8
+// CHECK-128-NEXT: %local_i64 = alloca <2 x i64>, align 8
+// CHECK-128-NEXT: %local_u8 = alloca <16 x i8>, align 8
+// CHECK-128-NEXT: %local_u16 = alloca <8 x i16>, align 8
+// CHECK-128-NEXT: %local_u32 = alloca <4 x i32>, align 8
+// CHECK-128-NEXT: %local_u64 = alloca <2 x i64>, align 8
+// CHECK-128-NEXT: %local_f32 = alloca <4 x float>, align 8
+// CHECK-128-NEXT: %local_f64 = alloca <2 x double>, align 8
+
+// CHECK-256:      %local_i8 = alloca <32 x i8>, align 8
+// CHECK-256-NEXT: %local_i16 = alloca <16 x i16>, align 8
+// CHECK-256-NEXT: %local_i32 = alloca <8 x i32>, align 8
+// CHECK-256-NEXT: %local_i64 = alloca <4 x i64>, align 8
+// CHECK-256-NEXT: %local_u8 = alloca <32 x i8>, align 8
+// CHECK-256-NEXT: %local_u16 = alloca <16 x i16>, align 8
+// CHECK-256-NEXT: %local_u32 = alloca <8 x i32>, align 8
+// CHECK-256-NEXT: %local_u64 = alloca <4 x i64>, align 8
+// CHECK-256-NEXT: %local_f32 = alloca <8 x float>, align 8
+// CHECK-256-NEXT: %local_f64 = alloca <4 x double>, align 8
+
+// CHECK-512:      %local_i8 = alloca <64 x i8>, align 8
+// CHECK-512-NEXT: %local_i16 = alloca <32 x i16>, align 8
+// CHECK-512-NEXT: %local_i32 = alloca <16 x i32>, align 8
+// CHECK-512-NEXT: %local_i64 = alloca <8 x i64>, align 8
+// CHECK-512-NEXT: %local_u8 = alloca <64 x i8>, align 8
+// CHECK-512-NEXT: %local_u16 = alloca <32 x i16>, align 8
+// CHECK-512-NEXT: %local_u32 = alloca <16 x i32>, align 8
+// CHECK-512-NEXT: %local_u64 = alloca <8 x i64>, align 8
+// CHECK-512-NEXT: %local_f32 = alloca <16 x float>, align 8
+// CHECK-512-NEXT: %local_f64 = alloca <8 x double>, align 8
+
+// CHECK-1024:       %local_i8 = alloca <128 x i8>, align 8
+// CHECK-1024-NEXT:  %local_i16 = alloca <64 x i16>, align 8
+// CHECK-1024-NEXT:  %local_i32 = alloca <32 x i32>, align 8
+// CHECK-1024-NEXT:  %local_i64 = alloca <16 x i64>, align 8
+// CHECK-1024-NEXT:  %local_u8 = alloca <128 x i8>, align 8
+// CHECK-1024-NEXT:  %local_u16 = alloca <64 x i16>, align 8
+// CHECK-1024-NEXT:  %local_u32 = alloca <32 x i32>, align 8
+// CHECK-1024-NEXT:  %local_u64 = alloca <16 x i64>, align 8
+// CHECK-1024-NEXT:  %local_f32 = alloca <32 x float>, align 8
+// CHECK-1024-NEXT:  %local_f64 = alloca <16 x double>, align 8
+
+//===----------------------------------------------------------------------===//
+// Local arrays
+//===----------------------------------------------------------------------===//
+// CHECK-64:      %local_arr_i8 = alloca [3 x <8 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_i16 = alloca [3 x <4 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_i32 = alloca [3 x <2 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_i64 = alloca [3 x <1 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_u8 = alloca [3 x <8 x i8>], align 8
+// CHECK-64-NEXT: %local_arr_u16 = alloca [3 x <4 x i16>], align 8
+// CHECK-64-NEXT: %local_arr_u32 = alloca [3 x <2 x i32>], align 8
+// CHECK-64-NEXT: %local_arr_u64 = alloca [3 x <1 x i64>], align 8
+// CHECK-64-NEXT: %local_arr_f32 = alloca [3 x <2 x float>], align 8
+// CHECK-64-NEXT: %local_arr_f64 = alloca [3 x <1 x double>], align 8
+
+// CHECK-128:      %local_arr_i8 = alloca [3 x <16 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_i16 = alloca [3 x <8 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_i32 = alloca [3 x <4 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_i64 = alloca [3 x <2 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_u8 = alloca [3 x <16 x i8>], align 8
+// CHECK-128-NEXT: %local_arr_u16 = alloca [3 x <8 x i16>], align 8
+// CHECK-128-NEXT: %local_arr_u32 = alloca [3 x <4 x i32>], align 8
+// CHECK-128-NEXT: %local_arr_u64 = alloca [3 x <2 x i64>], align 8
+// CHECK-128-NEXT: %local_arr_f32 = alloca [3 x <4 x float>], align 8
+// CHECK-128-NEXT: %local_arr_f64 = alloca [3 x <2 x double>], align 8
+
+// CHECK-256:      %local_arr_i8 = alloca [3 x <32 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_i16 = alloca [3 x <16 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_i32 = alloca [3 x <8 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_i64 = alloca [3 x <4 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_u8 = alloca [3 x <32 x i8>], align 8
+// CHECK-256-NEXT: %local_arr_u16 = alloca [3 x <16 x i16>], align 8
+// CHECK-256-NEXT: %local_arr_u32 = alloca [3 x <8 x i32>], align 8
+// CHECK-256-NEXT: %local_arr_u64 = alloca [3 x <4 x i64>], align 8
+// CHECK-256-NEXT: %local_arr_f32 = alloca [3 x <8 x float>], align 8
+// CHECK-256-NEXT: %local_arr_f64 = alloca [3 x <4 x double>], align 8
+
+// CHECK-512:      %local_arr_i8 = alloca [3 x <64 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_i16 = alloca [3 x <32 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_i32 = alloca [3 x <16 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_i64 = alloca [3 x <8 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_u8 = alloca [3 x <64 x i8>], align 8
+// CHECK-512-NEXT: %local_arr_u16 = alloca [3 x <32 x i16>], align 8
+// CHECK-512-NEXT: %local_arr_u32 = alloca [3 x <16 x i32>], align 8
+// CHECK-512-NEXT: %local_arr_u64 = alloca [3 x <8 x i64>], align 8
+// CHECK-512-NEXT: %local_arr_f32 = alloca [3 x <16 x float>], align 8
+// CHECK-512-NEXT: %local_arr_f64 = alloca [3 x <8 x double>], align 8
+
+// CHECK-1024:       %local_arr_i8 = alloca [3 x <128 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_i16 = alloca [3 x <64 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_i32 = alloca [3 x <32 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_i64 = alloca [3 x <16 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_u8 = alloca [3 x <128 x i8>], align 8
+// CHECK-1024-NEXT:  %local_arr_u16 = alloca [3 x <64 x i16>], align 8
+// CHECK-1024-NEXT:  %local_arr_u32 = alloca [3 x <32 x i32>], align 8
+// CHECK-1024-NEXT:  %local_arr_u64 = alloca [3 x <16 x i64>], align 8
+// CHECK-1024-NEXT:  %local_arr_f32 = alloca [3 x <32 x float>], align 8
+// CHECK-1024-NEXT:  %local_arr_f64 = alloca [3 x <16 x double>], align 8
+
+//===----------------------------------------------------------------------===//
+// ILP32 ABI
+//===----------------------------------------------------------------------===//
+// CHECK-ILP32: @global_i32 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-ILP32: @global_i64 ={{.*}} global <8 x i64> zeroinitializer, align 8
+// CHECK-ILP32: @global_u32 ={{.*}} global <16 x i32> zeroinitializer, align 8
+// CHECK-ILP32: @global_u64 ={{.*}} global <8 x i64> zeroinitializer, align 8
diff --git a/clang/test/CodeGen/riscv-rvv-vls-arith-ops.c b/clang/test/CodeGen/riscv-rvv-vls-arith-ops.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/riscv-rvv-vls-arith-ops.c
@@ -0,0 +1,1821 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \
+// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \
+// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \
+// RUN: opt -S -passes=sroa | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+// ADDITION
+
+// CHECK-LABEL: @add_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t add_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t add_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t add_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t add_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t add_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t add_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t add_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t add_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t add_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t add_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t add_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t add_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t add_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t add_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t add_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t add_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t add_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t add_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t add_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t add_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a += b;
+}
+
+// CHECK-LABEL: @add_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t add_scalar_i8(fixed_int8m1_t a, int8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t add_scalar_i16(fixed_int16m1_t a, int16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t add_scalar_i32(fixed_int32m1_t a, int32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t add_scalar_i64(fixed_int64m1_t a, int64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t add_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t add_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t add_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t add_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <8 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t add_scalar_f32(fixed_float32m1_t a, float b) {
+  return a + b;
+}
+
+// CHECK-LABEL: @add_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[ADD:%.*]] = fadd <4 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[ADD]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t add_scalar_f64(fixed_float64m1_t a, double b) {
+  return a + b;
+}
+
+// SUBTRACTION
+
+// CHECK-LABEL: @sub_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t sub_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t sub_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t sub_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t sub_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t sub_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t sub_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t sub_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t sub_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t sub_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t sub_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t sub_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t sub_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t sub_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t sub_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t sub_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t sub_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t sub_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t sub_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t sub_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t sub_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t sub_scalar_i8(fixed_int8m1_t a, int8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t sub_scalar_i16(fixed_int16m1_t a, int16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t sub_scalar_i32(fixed_int32m1_t a, int32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t sub_scalar_i64(fixed_int64m1_t a, int64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t sub_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t sub_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t sub_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t sub_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <8 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t sub_scalar_f32(fixed_float32m1_t a, float b) {
+  return a - b;
+}
+
+// CHECK-LABEL: @sub_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SUB:%.*]] = fsub <4 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[SUB]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t sub_scalar_f64(fixed_float64m1_t a, double b) {
+  return a - b;
+}
+
+// MULTIPLICATION
+
+// CHECK-LABEL: @mul_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t mul_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t mul_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t mul_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t mul_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t mul_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t mul_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t mul_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t mul_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t mul_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t mul_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t mul_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t mul_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t mul_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t mul_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t mul_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t mul_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t mul_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t mul_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t mul_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t mul_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t mul_scalar_i8(fixed_int8m1_t a, int8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t mul_scalar_i16(fixed_int16m1_t a, int16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t mul_scalar_i32(fixed_int32m1_t a, int32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t mul_scalar_i64(fixed_int64m1_t a, int64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t mul_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t mul_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t mul_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t mul_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <8 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t mul_scalar_f32(fixed_float32m1_t a, float b) {
+  return a * b;
+}
+
+// CHECK-LABEL: @mul_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[MUL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t mul_scalar_f64(fixed_float64m1_t a, double b) {
+  return a * b;
+}
+
+// DIVISION
+
+// CHECK-LABEL: @div_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t div_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t div_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t div_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t div_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t div_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t div_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t div_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t div_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t div_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t div_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t div_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t div_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t div_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t div_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t div_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t div_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t div_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t div_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t div_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_inplace_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t div_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t div_scalar_i8(fixed_int8m1_t a, int8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t div_scalar_i16(fixed_int16m1_t a, int16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t div_scalar_i32(fixed_int32m1_t a, int32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t div_scalar_i64(fixed_int64m1_t a, int64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t div_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t div_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t div_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = udiv <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t div_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <8 x float> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x float> @llvm.vector.insert.nxv2f32.v8f32(<vscale x 2 x float> undef, <8 x float> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x float> [[CASTSCALABLESVE]]
+//
+fixed_float32m1_t div_scalar_f32(fixed_float32m1_t a, float b) {
+  return a / b;
+}
+
+// CHECK-LABEL: @div_scalar_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[DIV:%.*]] = fdiv <4 x double> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x double> @llvm.vector.insert.nxv1f64.v4f64(<vscale x 1 x double> undef, <4 x double> [[DIV]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x double> [[CASTSCALABLESVE]]
+//
+fixed_float64m1_t div_scalar_f64(fixed_float64m1_t a, double b) {
+  return a / b;
+}
+
+// REMAINDER
+
+// CHECK-LABEL: @rem_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t rem_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t rem_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t rem_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t rem_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t rem_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t rem_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t rem_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t rem_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t rem_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t rem_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t rem_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = srem <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t rem_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t rem_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t rem_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t rem_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_inplace_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[REM:%.*]] = urem <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t rem_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t rem_scalar_i8(fixed_int8m1_t a, int8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t rem_scalar_i16(fixed_int16m1_t a, int16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t rem_scalar_i32(fixed_int32m1_t a, int32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = srem <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t rem_scalar_i64(fixed_int64m1_t a, int64_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <32 x i8> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t rem_scalar_u8(fixed_uint8m1_t a, uint8_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <16 x i16> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t rem_scalar_u16(fixed_uint16m1_t a, uint16_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t rem_scalar_u32(fixed_uint32m1_t a, uint32_t b) {
+  return a % b;
+}
+
+// CHECK-LABEL: @rem_scalar_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[REM:%.*]] = urem <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[REM]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t rem_scalar_u64(fixed_uint64m1_t a, uint64_t b) {
+  return a % b;
+}
diff --git a/clang/test/CodeGen/riscv-rvv-vls-bitwise-ops.c b/clang/test/CodeGen/riscv-rvv-vls-bitwise-ops.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/riscv-rvv-vls-bitwise-ops.c
@@ -0,0 +1,419 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \
+// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \
+// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \
+// RUN: opt -S -passes=sroa | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+// AND
+
+// CHECK-LABEL: @and_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t and_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t and_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t and_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t and_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t and_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t and_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t and_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a & b;
+}
+
+// CHECK-LABEL: @and_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[AND:%.*]] = and <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[AND]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t and_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a & b;
+}
+
+// OR
+
+// CHECK-LABEL: @or_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t or_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t or_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t or_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t or_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t or_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t or_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t or_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a | b;
+}
+
+// CHECK-LABEL: @or_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[OR:%.*]] = or <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[OR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t or_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a | b;
+}
+
+// XOR
+
+// CHECK-LABEL: @xor_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t xor_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t xor_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t xor_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t xor_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t xor_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t xor_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t xor_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a ^ b;
+}
+
+// CHECK-LABEL: @xor_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[XOR:%.*]] = xor <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[XOR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a ^ b;
+}
+
+// NEG
+
+// CHECK-LABEL: @not_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t not_i8(fixed_int8m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t not_i16(fixed_int16m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t not_i32(fixed_int32m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t not_i64(fixed_int64m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <32 x i8> [[A]], <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t not_u8(fixed_uint8m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <16 x i16> [[A]], <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t not_u16(fixed_uint16m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <8 x i32> [[A]], <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t not_u32(fixed_uint32m1_t a) {
+  return ~a;
+}
+
+// CHECK-LABEL: @not_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[NOT:%.*]] = xor <4 x i64> [[A]], <i64 -1, i64 -1, i64 -1, i64 -1>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[NOT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t not_u64(fixed_uint64m1_t a) {
+  return ~a;
+}
diff --git a/clang/test/CodeGen/riscv-rvv-vls-compare-ops.c b/clang/test/CodeGen/riscv-rvv-vls-compare-ops.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/riscv-rvv-vls-compare-ops.c
@@ -0,0 +1,827 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \
+// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \
+// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \
+// RUN: opt -S -passes=sroa | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+// EQ
+
+// CHECK-LABEL: @eq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t eq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t eq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t eq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t eq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t eq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t eq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t eq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp eq <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t eq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t eq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a == b;
+}
+
+// CHECK-LABEL: @eq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oeq <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t eq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a == b;
+}
+
+// NEQ
+
+// CHECK-LABEL: @neq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t neq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t neq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t neq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t neq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t neq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t neq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t neq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t neq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp une <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t neq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a != b;
+}
+
+// CHECK-LABEL: @neq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp une <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t neq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a != b;
+}
+
+// LT
+
+// CHECK-LABEL: @lt_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t lt_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t lt_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t lt_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t lt_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t lt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t lt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t lt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ult <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t lt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t lt_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a < b;
+}
+
+// CHECK-LABEL: @lt_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp olt <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t lt_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a < b;
+}
+
+// LEQ
+
+// CHECK-LABEL: @leq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t leq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t leq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t leq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sle <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t leq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t leq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t leq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t leq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ule <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t leq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t leq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a <= b;
+}
+
+// CHECK-LABEL: @leq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ole <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t leq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a <= b;
+}
+
+// GT
+
+// CHECK-LABEL: @gt_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t gt_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t gt_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t gt_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t gt_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t gt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t gt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t gt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ugt <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t gt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t gt_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a > b;
+}
+
+// CHECK-LABEL: @gt_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t gt_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a > b;
+}
+
+// GEQ
+
+// CHECK-LABEL: @geq_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t geq_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t geq_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t geq_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t geq_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t geq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t geq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t geq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = icmp uge <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t geq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_f32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <8 x float> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t geq_f32(fixed_float32m1_t a, fixed_float32m1_t b) {
+  return a >= b;
+}
+
+// CHECK-LABEL: @geq_f64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CMP:%.*]] = fcmp oge <4 x double> [[A]], [[B]]
+// CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64>
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SEXT]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t geq_f64(fixed_float64m1_t a, fixed_float64m1_t b) {
+  return a >= b;
+}
diff --git a/clang/test/CodeGen/riscv-rvv-vls-shift-ops.c b/clang/test/CodeGen/riscv-rvv-vls-shift-ops.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/riscv-rvv-vls-shift-ops.c
@@ -0,0 +1,659 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \
+// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \
+// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \
+// RUN: opt -S -passes=sroa | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+// CHECK-LABEL: @lshift_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t lshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t rshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t lshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i8> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t rshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t lshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t rshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t lshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i16> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t rshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t lshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t rshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t lshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i32> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t rshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t lshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_i64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t rshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t lshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @rshift_u64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[B_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i64> [[A]], [[B]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t rshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @lshift_i8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t lshift_i8_rsplat(fixed_int8m1_t a, int8_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t lshift_i8_lsplat(fixed_int8m1_t a, int8_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t rshift_i8_rsplat(fixed_int8m1_t a, int8_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <32 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_int8m1_t rshift_i8_lsplat(fixed_int8m1_t a, int8_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t lshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <32 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t lshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u8_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8>
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i8> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t rshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u8_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <32 x i8> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 8 x i8> @llvm.vector.insert.nxv8i8.v32i8(<vscale x 8 x i8> undef, <32 x i8> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 8 x i8> [[CASTSCALABLESVE]]
+//
+fixed_uint8m1_t rshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_i16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t lshift_i16_rsplat(fixed_int16m1_t a, int16_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t lshift_i16_lsplat(fixed_int16m1_t a, int16_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t rshift_i16_rsplat(fixed_int16m1_t a, int16_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <16 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_int16m1_t rshift_i16_lsplat(fixed_int16m1_t a, int16_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t lshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <16 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t lshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u16_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[B:%.*]] to i32
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16>
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i16> [[A]], [[SH_PROM]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t rshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u16_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <16 x i16> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 4 x i16> @llvm.vector.insert.nxv4i16.v16i16(<vscale x 4 x i16> undef, <16 x i16> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 4 x i16> [[CASTSCALABLESVE]]
+//
+fixed_uint16m1_t rshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_i32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t lshift_i32_rsplat(fixed_int32m1_t a, int32_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t lshift_i32_lsplat(fixed_int32m1_t a, int32_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t rshift_i32_rsplat(fixed_int32m1_t a, int32_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <8 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_int32m1_t rshift_i32_lsplat(fixed_int32m1_t a, int32_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t lshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <8 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t lshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u32_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i32> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t rshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u32_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <8 x i32> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v8i32(<vscale x 2 x i32> undef, <8 x i32> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 2 x i32> [[CASTSCALABLESVE]]
+//
+fixed_uint32m1_t rshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_i64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t lshift_i64_rsplat(fixed_int64m1_t a, int64_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_i64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t lshift_i64_lsplat(fixed_int64m1_t a, int64_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_i64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t rshift_i64_rsplat(fixed_int64m1_t a, int64_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_i64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = ashr <4 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_int64m1_t rshift_i64_lsplat(fixed_int64m1_t a, int64_t b) {
+  return b >> a;
+}
+
+// CHECK-LABEL: @lshift_u64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t lshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) {
+  return a << b;
+}
+
+// CHECK-LABEL: @lshift_u64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHL:%.*]] = shl <4 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHL]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t lshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) {
+  return b << a;
+}
+
+// CHECK-LABEL: @rshift_u64_rsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i64> [[A]], [[SPLAT_SPLAT]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t rshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) {
+  return a >> b;
+}
+
+// CHECK-LABEL: @rshift_u64_lsplat(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0
+// CHECK-NEXT:    [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+// CHECK-NEXT:    [[SHR:%.*]] = lshr <4 x i64> [[SPLAT_SPLAT]], [[A]]
+// CHECK-NEXT:    [[CASTSCALABLESVE:%.*]] = call <vscale x 1 x i64> @llvm.vector.insert.nxv1i64.v4i64(<vscale x 1 x i64> undef, <4 x i64> [[SHR]], i64 0)
+// CHECK-NEXT:    ret <vscale x 1 x i64> [[CASTSCALABLESVE]]
+//
+fixed_uint64m1_t rshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) {
+  return b >> a;
+}
diff --git a/clang/test/CodeGen/riscv-rvv-vls-subscript-ops.c b/clang/test/CodeGen/riscv-rvv-vls-subscript-ops.c
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGen/riscv-rvv-vls-subscript-ops.c
@@ -0,0 +1,136 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \
+// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \
+// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \
+// RUN: opt -S -passes=sroa | FileCheck %s
+
+// REQUIRES: riscv-registered-target
+
+#include <stddef.h>
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+// CHECK-LABEL: @subscript_int8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <32 x i8> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i8 [[VECEXT]]
+//
+int8_t subscript_int8(fixed_int8m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint8(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8(<vscale x 8 x i8> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <32 x i8> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i8 [[VECEXT]]
+//
+uint8_t subscript_uint8(fixed_uint8m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_int16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i16> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i16 [[VECEXT]]
+//
+int16_t subscript_int16(fixed_int16m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint16(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16(<vscale x 4 x i16> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <16 x i16> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i16 [[VECEXT]]
+//
+uint16_t subscript_uint16(fixed_uint16m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_int32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x i32> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i32 [[VECEXT]]
+//
+int32_t subscript_int32(fixed_int32m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32(<vscale x 2 x i32> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x i32> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i32 [[VECEXT]]
+//
+uint32_t subscript_uint32(fixed_uint32m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_int64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i64> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i64 [[VECEXT]]
+//
+int64_t subscript_int64(fixed_int64m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_uint64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64(<vscale x 1 x i64> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i64> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret i64 [[VECEXT]]
+//
+uint64_t subscript_uint64(fixed_uint64m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_float32(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32(<vscale x 2 x float> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <8 x float> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret float [[VECEXT]]
+//
+float subscript_float32(fixed_float32m1_t a, size_t b) {
+  return a[b];
+}
+
+// CHECK-LABEL: @subscript_float64(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64(<vscale x 1 x double> [[A_COERCE:%.*]], i64 0)
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x double> [[A]], i64 [[B:%.*]]
+// CHECK-NEXT:    ret double [[VECEXT]]
+//
+double subscript_float64(fixed_float64m1_t a, size_t b) {
+  return a[b];
+}
diff --git a/clang/test/CodeGenCXX/riscv-mangle-rvv-fixed-vectors.cpp b/clang/test/CodeGenCXX/riscv-mangle-rvv-fixed-vectors.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenCXX/riscv-mangle-rvv-fixed-vectors.cpp
@@ -0,0 +1,118 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-64
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-1024
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+template <typename T> struct S {};
+
+// CHECK-64: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj64EEE
+// CHECK-128: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj128EEE
+// CHECK-256: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj256EEE
+// CHECK-512: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj512EEE
+// CHECK-1024: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj1024EEE
+void f1(S<fixed_int8m1_t>) {}
+
+// CHECK-64: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj64EEE
+// CHECK-128: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj128EEE
+// CHECK-256: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj256EEE
+// CHECK-512: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj512EEE
+// CHECK-1024: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj1024EEE
+void f2(S<fixed_int16m1_t>) {}
+
+// CHECK-64: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj64EEE
+// CHECK-128: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj128EEE
+// CHECK-256: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj256EEE
+// CHECK-512: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj512EEE
+// CHECK-1024: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj1024EEE
+void f3(S<fixed_int32m1_t>) {}
+
+// CHECK-64: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj64EEE
+// CHECK-128: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj128EEE
+// CHECK-256: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj256EEE
+// CHECK-512: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj512EEE
+// CHECK-1024: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj1024EEE
+void f4(S<fixed_int64m1_t>) {}
+
+// CHECK-64: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj64EEE
+// CHECK-128: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj128EEE
+// CHECK-256: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj256EEE
+// CHECK-512: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj512EEE
+// CHECK-1024: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj1024EEE
+void f5(S<fixed_uint8m1_t>) {}
+
+// CHECK-64: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj64EEE
+// CHECK-128: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj128EEE
+// CHECK-256: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj256EEE
+// CHECK-512: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj512EEE
+// CHECK-1024: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj1024EEE
+void f6(S<fixed_uint16m1_t>) {}
+
+// CHECK-64: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj64EEE
+// CHECK-128: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj128EEE
+// CHECK-256: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj256EEE
+// CHECK-512: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj512EEE
+// CHECK-1024: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj1024EEE
+void f7(S<fixed_uint32m1_t>) {}
+
+// CHECK-64: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj64EEE
+// CHECK-128: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj128EEE
+// CHECK-256: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj256EEE
+// CHECK-512: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj512EEE
+// CHECK-1024: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj1024EEE
+void f8(S<fixed_uint64m1_t>) {}
+
+// CHECK-64: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj64EEE
+// CHECK-128: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj128EEE
+// CHECK-256: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj256EEE
+// CHECK-512: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj512EEE
+// CHECK-1024: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj1024EEE
+void f9(S<fixed_float32m1_t>) {}
+
+// CHECK-64: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj64EEE
+// CHECK-128: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj128EEE
+// CHECK-256: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj256EEE
+// CHECK-512: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj512EEE
+// CHECK-1024: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj1024EEE
+void f10(S<fixed_float64m1_t>) {}
diff --git a/clang/test/CodeGenCXX/riscv-rvv-fixedtypeinfo.cpp b/clang/test/CodeGenCXX/riscv-rvv-fixedtypeinfo.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/CodeGenCXX/riscv-rvv-fixedtypeinfo.cpp
@@ -0,0 +1,123 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-64
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-128
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-256
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-512
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \
+// RUN:  -target-feature +f -target-feature +d \
+// RUN:  -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 \
+// RUN:  | FileCheck %s --check-prefix=CHECK-1024
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+namespace std {
+class type_info;
+};
+
+auto &fs8 = typeid(fixed_int8m1_t);
+auto &fs16 = typeid(fixed_int16m1_t);
+auto &fs32 = typeid(fixed_int32m1_t);
+auto &fs64 = typeid(fixed_int64m1_t);
+
+auto &fu8 = typeid(fixed_uint8m1_t);
+auto &fu16 = typeid(fixed_uint16m1_t);
+auto &fu32 = typeid(fixed_uint32m1_t);
+auto &fu64 = typeid(fixed_uint64m1_t);
+
+auto &ff32 = typeid(fixed_float32m1_t);
+auto &ff64 = typeid(fixed_float64m1_t);
+
+// CHECK-64: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj1024EE
+
+// CHECK-64: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj64EE
+// CHECK-128: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj128EE
+// CHECK-256: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj256EE
+// CHECK-512: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj512EE
+// CHECK-1024: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj1024EE
diff --git a/clang/test/Driver/riscv-rvv-vector-bits.c b/clang/test/Driver/riscv-rvv-vector-bits.c
--- a/clang/test/Driver/riscv-rvv-vector-bits.c
+++ b/clang/test/Driver/riscv-rvv-vector-bits.c
@@ -43,3 +43,24 @@
 // RUN:  -mrvv-vector-bits=64 2>&1 | FileCheck --check-prefix=CHECK-BAD-VALUE-ERROR %s
 
 // CHECK-BAD-VALUE-ERROR: error: unsupported argument '{{.*}}' to option '-mrvv-vector-bits='
+
+// Error if using attribute without -msve-vector-bits=<bits> or if using -msve-vector-bits=<bits>+ syntax
+// -----------------------------------------------------------------------------
+// RUN: not %clang -c %s -o /dev/null -target riscv64-linux-gnu \
+// RUN:  -march=rv64gc_zve64x 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target riscv64-linux-gnu \
+// RUN:  -march=rv64gc_zve64x -mrvv-vector-bits=scalable 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+
+typedef __rvv_int32m1_t vint32m1_t;
+typedef vint32m1_t noflag __attribute__((riscv_rvv_vector_bits(256)));
+
+// CHECK-NO-FLAG-ERROR: error: 'riscv_rvv_vector_bits' is only supported when '-mrvv-vector-bits=<bits>' is specified with a value of "zvl" or a power 2 in the range [64,65536]
+
+// Error if attribute vector size != -mrvv-vector-bits
+// -----------------------------------------------------------------------------
+// RUN: not %clang -c %s -o /dev/null -target riscv64-linux-gnu \
+// RUN:  -march=rv64gc_zve64x -mrvv-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-BAD-VECTOR-SIZE-ERROR %s
+
+typedef vint32_t bad_vector_size __attribute__((riscv_rvv_vector_bits(256)));
+
+// CHECK-BAD-VECTOR-SIZE-ERROR: error: invalid RVV vector size '256', must match value set by '-mrvv-vector-bits' ('128')
diff --git a/clang/test/Sema/attr-riscv-rvv-vector-bits.c b/clang/test/Sema/attr-riscv-rvv-vector-bits.c
--- a/clang/test/Sema/attr-riscv-rvv-vector-bits.c
+++ b/clang/test/Sema/attr-riscv-rvv-vector-bits.c
@@ -1,10 +1,12 @@
-// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify %s
-
-// TODO: Support for a arm_sve_vector_bits like attribute will come in the future.
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=1 -mvscale-max=1 %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=2 -mvscale-max=2 %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=4 -mvscale-max=4 %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=8 -mvscale-max=8 %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=16 -mvscale-max=16 %s
 
 #include <stdint.h>
 
-#define N 64
+#define N __RISCV_RVV_VLEN_BITS
 
 typedef __rvv_int8m1_t vint8m1_t;
 typedef __rvv_uint8m1_t vuint8m1_t;
@@ -17,64 +19,299 @@
 typedef __rvv_float32m1_t vfloat32m1_t;
 typedef __rvv_float64m1_t vfloat64m1_t;
 
+// Define valid fixed-width RVV types
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
+typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N)));
+
 // GNU vector types
-typedef int8_t gnu_int8_t __attribute__((vector_size(N / 8)));
-typedef int16_t gnu_int16_t __attribute__((vector_size(N / 8)));
-typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8)));
-typedef int64_t gnu_int64_t __attribute__((vector_size(N / 8)));
+typedef int8_t gnu_int8m1_t __attribute__((vector_size(N / 8)));
+typedef int16_t gnu_int16m1_t __attribute__((vector_size(N / 8)));
+typedef int32_t gnu_int32m1_t __attribute__((vector_size(N / 8)));
+typedef int64_t gnu_int64m1_t __attribute__((vector_size(N / 8)));
+
+typedef uint8_t gnu_uint8m1_t __attribute__((vector_size(N / 8)));
+typedef uint16_t gnu_uint16m1_t __attribute__((vector_size(N / 8)));
+typedef uint32_t gnu_uint32m1_t __attribute__((vector_size(N / 8)));
+typedef uint64_t gnu_uint64m1_t __attribute__((vector_size(N / 8)));
+
+typedef float gnu_float32m1_t __attribute__((vector_size(N / 8)));
+typedef double gnu_float64m1_t __attribute__((vector_size(N / 8)));
+
+// Attribute must have a single argument
+typedef vint8m1_t no_argument __attribute__((riscv_rvv_vector_bits));         // expected-error {{'riscv_rvv_vector_bits' attribute takes one argument}}
+typedef vint8m1_t two_arguments __attribute__((riscv_rvv_vector_bits(2, 4))); // expected-error {{'riscv_rvv_vector_bits' attribute takes one argument}}
+
+// The number of RVV vector bits must be an integer constant expression
+typedef vint8m1_t non_int_size1 __attribute__((riscv_rvv_vector_bits(2.0)));   // expected-error {{'riscv_rvv_vector_bits' attribute requires an integer constant}}
+typedef vint8m1_t non_int_size2 __attribute__((riscv_rvv_vector_bits("256"))); // expected-error {{'riscv_rvv_vector_bits' attribute requires an integer constant}}
+
+// Attribute must be attached to a single RVV vector or predicate type.
+typedef void *badtype1 __attribute__((riscv_rvv_vector_bits(N)));         // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'void *'}}
+typedef int badtype2 __attribute__((riscv_rvv_vector_bits(N)));           // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'int'}}
+typedef float badtype3 __attribute__((riscv_rvv_vector_bits(N)));         // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'float'}}
+
+// Attribute only applies to typedefs.
+vint8m1_t non_typedef_type __attribute__((riscv_rvv_vector_bits(N)));  // expected-error {{'riscv_rvv_vector_bits' attribute only applies to typedefs}}
+
+// Test that we can define non-local fixed-length RVV types (unsupported for
+// sizeless types).
+fixed_int8m1_t global_int8;
+
+extern fixed_int8m1_t extern_int8;
+
+static fixed_int8m1_t static_int8;
+
+fixed_int8m1_t *global_int8_ptr;
+extern fixed_int8m1_t *extern_int8_ptr;
+static fixed_int8m1_t *static_int8_ptr;
+__thread fixed_int8m1_t thread_int8;
+
+typedef fixed_int8m1_t int8_typedef;
+typedef fixed_int8m1_t *int8_ptr_typedef;
 
-typedef uint8_t gnu_uint8_t __attribute__((vector_size(N / 8)));
-typedef uint16_t gnu_uint16_t __attribute__((vector_size(N / 8)));
-typedef uint32_t gnu_uint32_t __attribute__((vector_size(N / 8)));
-typedef uint64_t gnu_uint64_t __attribute__((vector_size(N / 8)));
+// Test sized expressions
+int sizeof_int8 = sizeof(global_int8);
+int sizeof_int8_var = sizeof(*global_int8_ptr);
+int sizeof_int8_var_ptr = sizeof(global_int8_ptr);
 
-typedef float gnu_float32_t __attribute__((vector_size(N / 8)));
-typedef double gnu_float64_t __attribute__((vector_size(N / 8)));
+extern fixed_int8m1_t *extern_int8_ptr;
 
+int alignof_int8 = __alignof__(extern_int8);
+int alignof_int8_var = __alignof__(*extern_int8_ptr);
+int alignof_int8_var_ptr = __alignof__(extern_int8_ptr);
 
 void f(int c) {
+  fixed_int8m1_t fs8;
   vint8m1_t ss8;
-  gnu_int8_t gs8;
+  gnu_int8m1_t gs8;
 
   // Check conditional expressions where the result is ambiguous are
   // ill-formed.
   void *sel __attribute__((unused));
+  sel = c ? ss8 : fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
+  sel = c ? fs8 : ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
 
   sel = c ? gs8 : ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
   sel = c ? ss8 : gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
+  sel = c ? gs8 : fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+  sel = c ? fs8 : gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+
   // Check binary expressions where the result is ambiguous are ill-formed.
+  ss8 = ss8 + fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
   ss8 = ss8 + gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
+  fs8 = fs8 + ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
+  fs8 = fs8 + gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+
   gs8 = gs8 + ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+  gs8 = gs8 + fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
+  ss8 += fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
   ss8 += gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
+  fs8 += ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
+  fs8 += gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+
   gs8 += ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+  gs8 += fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
+  ss8 = ss8 == fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
   ss8 = ss8 == gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
+  fs8 = fs8 == ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
+  fs8 = fs8 == gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+
   gs8 = gs8 == ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+  gs8 = gs8 == fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
-  ss8 = ss8 & gs8; // expected-error {{invalid operands to binary expression ('vint8m1_t' (aka '__rvv_int8m1_t') and 'gnu_int8_t' (vector of 8 'int8_t' values))}}
+  ss8 = ss8 & fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
+  ss8 = ss8 & gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 
-  gs8 = gs8 & ss8; // expected-error {{invalid operands to binary expression ('gnu_int8_t' (vector of 8 'int8_t' values) and 'vint8m1_t' (aka '__rvv_int8m1_t'))}}
+  fs8 = fs8 & ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}}
+  fs8 = fs8 & gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+
+  gs8 = gs8 & ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
+  gs8 = gs8 & fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}}
 }
 
+// --------------------------------------------------------------------------//
+// Sizeof
+
+#define VECTOR_SIZE ((N / 8))
+
+_Static_assert(sizeof(fixed_int8m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_int16m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_int32m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_int64m1_t) == VECTOR_SIZE, "");
+
+_Static_assert(sizeof(fixed_uint8m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_uint16m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_uint32m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_int64m1_t) == VECTOR_SIZE, "");
+
+_Static_assert(sizeof(fixed_float32m1_t) == VECTOR_SIZE, "");
+_Static_assert(sizeof(fixed_float64m1_t) == VECTOR_SIZE, "");
+
+// --------------------------------------------------------------------------//
+// Alignof
+
+#define VECTOR_ALIGN 8
+
+_Static_assert(__alignof__(fixed_int8m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int16m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int32m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_int64m1_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_uint8m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint16m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint32m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_uint64m1_t) == VECTOR_ALIGN, "");
+
+_Static_assert(__alignof__(fixed_float32m1_t) == VECTOR_ALIGN, "");
+_Static_assert(__alignof__(fixed_float64m1_t) == VECTOR_ALIGN, "");
+
+// --------------------------------------------------------------------------//
+// Structs
+
+struct struct_int64 { fixed_int64m1_t x, y[5]; };
+struct struct_float64 { fixed_float64m1_t x, y[5]; };
+
+// --------------------------------------------------------------------------//
+// Unions
+union union_int64 { fixed_int64m1_t x, y[5]; };
+union union_float64 { fixed_float64m1_t x, y[5]; };
+
 // --------------------------------------------------------------------------//
 // Implicit casts
 
-gnu_int8_t to_gnu_int8_t_from_vint8m1_t_(vint8m1_t x) { return x; } // expected-error {{returning 'vint8m1_t' (aka '__rvv_int8m1_t') from a function with incompatible result type 'gnu_int8_t' (vector of 8 'int8_t' values)}}
-vint8m1_t from_gnu_int8_t_to_vint8m1_t(gnu_int8_t x) { return x; } // expected-error {{returning 'gnu_int8_t' (vector of 8 'int8_t' values) from a function with incompatible result type 'vint8m1_t' (aka '__rvv_int8m1_t')}}
+#define TEST_CAST_COMMON(TYPE)                                              \
+  v##TYPE##_t to_v##TYPE##_t_from_fixed(fixed_##TYPE##_t x) { return x; } \
+  fixed_##TYPE##_t from_##TYPE##_t_to_fixed(v##TYPE##_t x) { return x; }
+
+#define TEST_CAST_GNU(PREFIX, TYPE)                                                          \
+  gnu_##TYPE##_t to_gnu_##TYPE##_t_from_##PREFIX##TYPE##_t(PREFIX##TYPE##_t x) { return x; } \
+  PREFIX##TYPE##_t from_gnu_##TYPE##_t_to_##PREFIX##TYPE##_t(gnu_##TYPE##_t x) { return x; }
+
+#define TEST_CAST_VECTOR(TYPE) \
+  TEST_CAST_COMMON(TYPE)       \
+  TEST_CAST_GNU(v, TYPE)      \
+  TEST_CAST_GNU(fixed_, TYPE)
+
+TEST_CAST_VECTOR(int8m1)
+TEST_CAST_VECTOR(int16m1)
+TEST_CAST_VECTOR(int32m1)
+TEST_CAST_VECTOR(int64m1)
+TEST_CAST_VECTOR(uint8m1)
+TEST_CAST_VECTOR(uint16m1)
+TEST_CAST_VECTOR(uint32m1)
+TEST_CAST_VECTOR(uint64m1)
+TEST_CAST_VECTOR(float32m1)
+TEST_CAST_VECTOR(float64m1)
 
 // --------------------------------------------------------------------------//
-// Test passing GNU vector scalable function
+// Test the scalable and fixed-length types can be used interchangeably
 
 vint32m1_t __attribute__((overloadable)) vfunc(vint32m1_t op1, vint32m1_t op2);
 vfloat64m1_t __attribute__((overloadable)) vfunc(vfloat64m1_t op1, vfloat64m1_t op2);
 
-gnu_int32_t call_int32_ff(gnu_int32_t op1, gnu_int32_t op2) {
-  return vfunc(op1, op2); // expected-error {{no matching function for call to 'vfunc'}}
-                          // expected-note@-5 {{candidate function not viable: no known conversion from 'gnu_int32_t' (vector of 2 'int32_t' values) to 'vint32m1_t' (aka '__rvv_int32m1_t') for 1st argument}}
-                          // expected-note@-5 {{candidate function not viable: no known conversion from 'gnu_int32_t' (vector of 2 'int32_t' values) to 'vfloat64m1_t' (aka '__rvv_float64m1_t') for 1st argument}}
-}
+#define TEST_CALL(TYPE)                                              \
+  fixed_##TYPE##_t                                                   \
+      call_##TYPE##_ff(fixed_##TYPE##_t op1, fixed_##TYPE##_t op2) { \
+    return vfunc(op1, op2);                                         \
+  }                                                                  \
+  fixed_##TYPE##_t                                                   \
+      call_##TYPE##_fs(fixed_##TYPE##_t op1, v##TYPE##_t op2) {     \
+    return vfunc(op1, op2);                                         \
+  }                                                                  \
+  fixed_##TYPE##_t                                                   \
+      call_##TYPE##_sf(v##TYPE##_t op1, fixed_##TYPE##_t op2) {     \
+    return vfunc(op1, op2);                                         \
+  }
+
+TEST_CALL(int32m1)
+TEST_CALL(float64m1)
+
+// --------------------------------------------------------------------------//
+// Vector initialization
+
+#if __RISCV_RVV_VLEN_BITS == 256
+
+typedef vint32m1_t int32x8 __attribute__((riscv_rvv_vector_bits(N)));
+typedef vfloat64m1_t float64x4 __attribute__((riscv_rvv_vector_bits(N)));
+
+int32x8 foo = {1, 2, 3, 4, 5, 6, 7, 8};
+int32x8 foo2 = {1, 2, 3, 4, 5, 6, 7, 8, 9}; // expected-warning{{excess elements in vector initializer}}
+
+float64x4 bar = {1.0, 2.0, 3.0, 4.0};
+float64x4 bar2 = {1.0, 2.0, 3.0, 4.0, 5.0}; // expected-warning{{excess elements in vector initializer}}
+
+#endif
+
+// --------------------------------------------------------------------------//
+// Vector ops
+
+#define TEST_BINARY(TYPE, NAME, OP)                  \
+  TYPE NAME##_##TYPE(TYPE op1, TYPE op2) {           \
+    return op1 OP op2;                               \
+  }                                                  \
+  TYPE compound##NAME##_##TYPE(TYPE op1, TYPE op2) { \
+    op1 OP##= op2;                                   \
+    return op1;                                      \
+  }
+
+#define TEST_COMPARISON(TYPE, NAME, OP)    \
+  TYPE NAME##_##TYPE(TYPE op1, TYPE op2) { \
+    return op1 OP op2;                     \
+  }
+
+#define TEST_UNARY(TYPE, NAME, OP) \
+  TYPE NAME##_##TYPE(TYPE op1) {   \
+    return OP op1;                 \
+  }
+
+#define TEST_OPS(TYPE)           \
+  TEST_BINARY(TYPE, add, +)      \
+  TEST_BINARY(TYPE, sub, -)      \
+  TEST_BINARY(TYPE, mul, *)      \
+  TEST_BINARY(TYPE, div, /)      \
+  TEST_COMPARISON(TYPE, eq, ==)  \
+  TEST_COMPARISON(TYPE, ne, !=)  \
+  TEST_COMPARISON(TYPE, lt, <)   \
+  TEST_COMPARISON(TYPE, gt, >)   \
+  TEST_COMPARISON(TYPE, lte, <=) \
+  TEST_COMPARISON(TYPE, gte, >=) \
+  TEST_UNARY(TYPE, nop, +)       \
+  TEST_UNARY(TYPE, neg, -)
+
+#define TEST_INT_OPS(TYPE)   \
+  TEST_OPS(TYPE)             \
+  TEST_BINARY(TYPE, mod, %)  \
+  TEST_BINARY(TYPE, and, &)  \
+  TEST_BINARY(TYPE, or, |)   \
+  TEST_BINARY(TYPE, xor, ^)  \
+  TEST_BINARY(TYPE, shl, <<) \
+  TEST_BINARY(TYPE, shr, <<) \
+  TEST_UNARY(TYPE, not, ~)
+
+TEST_INT_OPS(fixed_int8m1_t)
+TEST_INT_OPS(fixed_int16m1_t)
+TEST_INT_OPS(fixed_int32m1_t)
+TEST_INT_OPS(fixed_int64m1_t)
+TEST_INT_OPS(fixed_uint8m1_t)
+TEST_INT_OPS(fixed_uint16m1_t)
+TEST_INT_OPS(fixed_uint32m1_t)
+TEST_INT_OPS(fixed_uint64m1_t)
+
+TEST_OPS(fixed_float32m1_t)
+TEST_OPS(fixed_float64m1_t)
diff --git a/clang/test/Sema/riscv-rvv-explicit-casts-fixed-size.c b/clang/test/Sema/riscv-rvv-explicit-casts-fixed-size.c
new file mode 100644
--- /dev/null
+++ b/clang/test/Sema/riscv-rvv-explicit-casts-fixed-size.c
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+
+// expected-no-diagnostics
+
+// REQUIRES: riscv-registered-target
+
+#define N __RISCV_RVV_VLEN_BITS
+#define FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N)))
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vfloat32m1_t fixed_float32m1_t FIXED_ATTR;
+typedef vfloat64m1_t fixed_float64m1_t FIXED_ATTR;
+typedef vint32m1_t fixed_int32m1_t FIXED_ATTR;
+typedef vint64m1_t fixed_int64m1_t FIXED_ATTR;
+
+// RVV VLS types can be cast to RVV VLA types, regardless of lane size.
+// NOTE: the list below is NOT exhaustive for all RVV types.
+
+#define CAST(from, to) \
+    void from##_to_##to(from a, to b) { \
+        b = (to) a; \
+    }
+
+#define TESTCASE(ty1, ty2) \
+    CAST(ty1, ty2) \
+    CAST(ty2, ty1)
+
+TESTCASE(fixed_float32m1_t, vfloat32m1_t)
+TESTCASE(fixed_float32m1_t, vfloat64m1_t)
+TESTCASE(fixed_float32m1_t, vint32m1_t)
+TESTCASE(fixed_float32m1_t, vint64m1_t)
+
+TESTCASE(fixed_float64m1_t, vfloat32m1_t)
+TESTCASE(fixed_float64m1_t, vfloat64m1_t)
+TESTCASE(fixed_float64m1_t, vint32m1_t)
+TESTCASE(fixed_float64m1_t, vint64m1_t)
+
+TESTCASE(fixed_int32m1_t, vfloat32m1_t)
+TESTCASE(fixed_int32m1_t, vfloat64m1_t)
+TESTCASE(fixed_int32m1_t, vint32m1_t)
+TESTCASE(fixed_int32m1_t, vint64m1_t)
+
+TESTCASE(fixed_int64m1_t, vfloat32m1_t)
+TESTCASE(fixed_int64m1_t, vfloat64m1_t)
+TESTCASE(fixed_int64m1_t, vint32m1_t)
+TESTCASE(fixed_int64m1_t, vint64m1_t)
diff --git a/clang/test/Sema/riscv-rvv-lax-vector-conversions.c b/clang/test/Sema/riscv-rvv-lax-vector-conversions.c
new file mode 100644
--- /dev/null
+++ b/clang/test/Sema/riscv-rvv-lax-vector-conversions.c
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=integer -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=all -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+
+// lax-vector-all-no-diagnostics
+
+// REQUIRES: riscv-registered-target
+
+#define N __RISCV_RVV_VLEN_BITS
+#define RVV_FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N)))
+#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8)))
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vfloat32m1_t rvv_fixed_float32m1_t RVV_FIXED_ATTR;
+typedef vint32m1_t rvv_fixed_int32m1_t RVV_FIXED_ATTR;
+typedef float gnu_fixed_float32m1_t GNU_FIXED_ATTR;
+typedef int gnu_fixed_int32m1_t GNU_FIXED_ATTR;
+
+void rvv_allowed_with_integer_lax_conversions() {
+  rvv_fixed_int32m1_t fi32;
+  vint64m1_t si64;
+
+  // The implicit cast here should fail if -flax-vector-conversions=none, but pass if
+  // -flax-vector-conversions={integer,all}.
+  fi32 = si64;
+  // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
+  si64 = fi32;
+  // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}}
+}
+
+void rvv_allowed_with_all_lax_conversions() {
+  rvv_fixed_float32m1_t ff32;
+  vfloat64m1_t sf64;
+
+  // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if
+  // -flax-vector-conversions=all.
+  ff32 = sf64;
+  // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+}
+
+void gnu_allowed_with_integer_lax_conversions() {
+  gnu_fixed_int32m1_t fi32;
+  vint64m1_t si64;
+
+  // The implicit cast here should fail if -flax-vector-conversions=none, but pass if
+  // -flax-vector-conversions={integer,all}.
+  fi32 = si64;
+  // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
+  si64 = fi32;
+  // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}}
+}
+
+void gnu_allowed_with_all_lax_conversions() {
+  gnu_fixed_float32m1_t ff32;
+  vfloat64m1_t sf64;
+
+  // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if
+  // -flax-vector-conversions=all.
+  ff32 = sf64;
+  // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+}
diff --git a/clang/test/Sema/riscv-vector-types-support.c b/clang/test/Sema/riscv-vector-types-support.c
new file mode 100644
--- /dev/null
+++ b/clang/test/Sema/riscv-vector-types-support.c
@@ -0,0 +1,3 @@
+// RUN: %clang_cc1 %s -triple riscv64 -fsyntax-only -verify
+
+typedef __attribute__((riscv_rvv_vector_bits(256))) void norvvflag; // expected-error{{'riscv_rvv_vector_bits' attribute is not supported on targets missing 'zve32x'; specify an appropriate -march= or -mcpu=}}
diff --git a/clang/test/SemaCXX/attr-riscv-rvv-vector-bits.cpp b/clang/test/SemaCXX/attr-riscv-rvv-vector-bits.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/SemaCXX/attr-riscv-rvv-vector-bits.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64x -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -Wconversion %s
+// expected-no-diagnostics
+
+#include <stdint.h>
+
+#define N __RISCV_RVV_VLEN_BITS
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N)));
+typedef int8_t gnu_int8m1_t __attribute__((vector_size(N / 8)));
+
+template<typename T> struct S { T var; };
+
+S<fixed_int8m1_t> s;
+
+// Test implicit casts between VLA and VLS vectors
+vint8m1_t to_vint8m1_t(fixed_int8m1_t x) { return x; }
+fixed_int8m1_t from_vint8m1_t(vint8m1_t x) { return x; }
+
+// Test implicit casts between GNU and VLA vectors
+vint8m1_t to_vint8m1_t__from_gnu_int8m1_t(gnu_int8m1_t x) { return x; }
+gnu_int8m1_t from_vint8m1_t__to_gnu_int8m1_t(vint8m1_t x) { return x; }
+
+// Test implicit casts between GNU and VLS vectors
+fixed_int8m1_t to_fixed_int8m1_t__from_gnu_int8m1_t(gnu_int8m1_t x) { return x; }
+gnu_int8m1_t from_fixed_int8m1_t__to_gnu_int8m1_t(fixed_int8m1_t x) { return x; }
diff --git a/clang/test/SemaCXX/riscv-rvv-explicit-casts-fixed-size.cpp b/clang/test/SemaCXX/riscv-rvv-explicit-casts-fixed-size.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/SemaCXX/riscv-rvv-explicit-casts-fixed-size.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s
+
+// REQUIRES: riscv-registered-target
+
+// expected-no-diagnostics
+
+#define N __RISCV_RVV_VLEN_BITS
+#define FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N)))
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vfloat32m1_t fixed_float32m1_t FIXED_ATTR;
+typedef vfloat64m1_t fixed_float64m1_t FIXED_ATTR;
+typedef vint32m1_t fixed_int32m1_t FIXED_ATTR;
+typedef vint64m1_t fixed_int64m1_t FIXED_ATTR;
+
+// RVV VLS types can be cast to RVV VLA types, regardless of lane size.
+// NOTE: the list below is NOT exhaustive for all RVV types.
+
+#define CAST(from, to) \
+    void from##_to_##to(from a, to b) { \
+        b = (to) a; \
+    }
+
+#define TESTCASE(ty1, ty2) \
+    CAST(ty1, ty2) \
+    CAST(ty2, ty1)
+
+TESTCASE(fixed_float32m1_t, vfloat32m1_t)
+TESTCASE(fixed_float32m1_t, vfloat64m1_t)
+TESTCASE(fixed_float32m1_t, vint32m1_t)
+TESTCASE(fixed_float32m1_t, vint64m1_t)
+
+TESTCASE(fixed_float64m1_t, vfloat32m1_t)
+TESTCASE(fixed_float64m1_t, vfloat64m1_t)
+TESTCASE(fixed_float64m1_t, vint32m1_t)
+TESTCASE(fixed_float64m1_t, vint64m1_t)
+
+TESTCASE(fixed_int32m1_t, vfloat32m1_t)
+TESTCASE(fixed_int32m1_t, vfloat64m1_t)
+TESTCASE(fixed_int32m1_t, vint32m1_t)
+TESTCASE(fixed_int32m1_t, vint64m1_t)
+
+TESTCASE(fixed_int64m1_t, vfloat32m1_t)
+TESTCASE(fixed_int64m1_t, vfloat64m1_t)
+TESTCASE(fixed_int64m1_t, vint32m1_t)
+TESTCASE(fixed_int64m1_t, vint64m1_t)
diff --git a/clang/test/SemaCXX/riscv-rvv-lax-vector-conversions.cpp b/clang/test/SemaCXX/riscv-rvv-lax-vector-conversions.cpp
new file mode 100644
--- /dev/null
+++ b/clang/test/SemaCXX/riscv-rvv-lax-vector-conversions.cpp
@@ -0,0 +1,79 @@
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify=lax-vector-none %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=integer -ffreestanding -fsyntax-only -verify=lax-vector-integer %s
+// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=all -ffreestanding -fsyntax-only -verify=lax-vector-all %s
+
+// lax-vector-all-no-diagnostics
+
+// REQUIRES: riscv-registered-target
+
+#define N __RISCV_RVV_VLEN_BITS
+#define RVV_FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N)))
+#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8)))
+
+typedef __rvv_int8m1_t vint8m1_t;
+typedef __rvv_uint8m1_t vuint8m1_t;
+typedef __rvv_int16m1_t vint16m1_t;
+typedef __rvv_uint16m1_t vuint16m1_t;
+typedef __rvv_int32m1_t vint32m1_t;
+typedef __rvv_uint32m1_t vuint32m1_t;
+typedef __rvv_int64m1_t vint64m1_t;
+typedef __rvv_uint64m1_t vuint64m1_t;
+typedef __rvv_float32m1_t vfloat32m1_t;
+typedef __rvv_float64m1_t vfloat64m1_t;
+
+typedef vfloat32m1_t rvv_fixed_float32m1_t RVV_FIXED_ATTR;
+typedef vint32m1_t rvv_fixed_int32m1_t RVV_FIXED_ATTR;
+typedef float gnu_fixed_float32m1_t GNU_FIXED_ATTR;
+typedef int gnu_fixed_int32m1_t GNU_FIXED_ATTR;
+
+void rvv_allowed_with_integer_lax_conversions() {
+  rvv_fixed_int32m1_t fi32;
+  vint64m1_t si64;
+
+  // The implicit cast here should fail if -flax-vector-conversions=none, but pass if
+  // -flax-vector-conversions={integer,all}.
+  fi32 = si64;
+  // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
+  si64 = fi32;
+  // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}}
+}
+
+void rvv_allowed_with_all_lax_conversions() {
+  rvv_fixed_float32m1_t ff32;
+  vfloat64m1_t sf64;
+
+  // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if
+  // -flax-vector-conversions=all.
+  ff32 = sf64;
+  // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+}
+
+void gnu_allowed_with_integer_lax_conversions() {
+  gnu_fixed_int32m1_t fi32;
+  vint64m1_t si64;
+
+  // The implicit cast here should fail if -flax-vector-conversions=none, but pass if
+  // -flax-vector-conversions={integer,all}.
+  fi32 = si64;
+  // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}}
+  si64 = fi32;
+  // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}}
+}
+
+void gnu_allowed_with_all_lax_conversions() {
+  gnu_fixed_float32m1_t ff32;
+  vfloat64m1_t sf64;
+
+  // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if
+  // -flax-vector-conversions=all.
+  ff32 = sf64;
+  // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}}
+  sf64 = ff32;
+  // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+  // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}}
+}