diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -389,6 +389,9 @@ - Fixed incorrect ABI lowering of ``_Float16`` in the case of structs containing ``_Float16`` that are eligible for passing via GPR+FPR or FPR+FPR. +- Added ``attribute(riscv_rvv_vector_bits(__RISCV_RVV_VLEN_BITS))`` to allow + the size of a RVV (RISC-V Vector) scalable type to be specified. This allows + RVV scalable vector types to be used in structs or in global variables. CUDA/HIP Language Changes ^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h --- a/clang/include/clang/AST/ASTContext.h +++ b/clang/include/clang/AST/ASTContext.h @@ -2253,6 +2253,15 @@ /// false otherwise. bool areLaxCompatibleSveTypes(QualType FirstType, QualType SecondType); + /// Return true if the given types are an RISC-V vector builtin type and a + /// VectorType that is a fixed-length representation of the RISC-V vector + /// builtin type for a specific vector-length. + bool areCompatibleRVVTypes(QualType FirstType, QualType SecondType); + + /// Return true if the given vector types are lax-compatible RISC-V vector + /// types as defined by -flax-vector-conversions=, false otherwise. + bool areLaxCompatibleRVVTypes(QualType FirstType, QualType SecondType); + /// Return true if the type has been explicitly qualified with ObjC ownership. /// A type may be implicitly qualified with ownership under ObjC ARC, and in /// some cases the compiler treats these differently. diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -1767,7 +1767,7 @@ /// The kind of vector, either a generic vector type or some /// target-specific vector type such as for AltiVec or Neon. - unsigned VecKind : 3; + unsigned VecKind : 4; /// The number of elements in the vector. uint32_t NumElements; }; @@ -2046,6 +2046,16 @@ /// 'arm_sve_vector_bits' type attribute as VectorType. QualType getSveEltType(const ASTContext &Ctx) const; + /// Determines if this is a sizeless type supported by the + /// 'riscv_rvv_vector_bits' type attribute, which can be applied to a single + /// RVV vector or mask. + bool isRVVVLSBuiltinType() const; + + /// Returns the representative type for the element of an RVV builtin type. + /// This is used to represent fixed-length RVV vectors created with the + /// 'riscv_rvv_vector_bits' type attribute as VectorType. + QualType getRVVEltType(const ASTContext &Ctx) const; + /// Types are partitioned into 3 broad categories (C99 6.2.5p1): /// object types, function types, and incomplete types. @@ -3399,7 +3409,10 @@ SveFixedLengthDataVector, /// is AArch64 SVE fixed-length predicate vector - SveFixedLengthPredicateVector + SveFixedLengthPredicateVector, + + /// is RISC-V RVV fixed-length data vector + RVVFixedLengthDataVector, }; protected: diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1871,6 +1871,16 @@ let Documentation = [RISCVInterruptDocs]; } +def RISCVRVVVectorBits : TypeAttr { + let Spellings = [GNU<"riscv_rvv_vector_bits">]; + let Subjects = SubjectList<[TypedefName], ErrorDiag>; + let Args = [UnsignedArgument<"NumBits">]; + let Documentation = [RISCVRVVVectorBitsDocs]; + let PragmaAttributeSupport = 0; + // Represented as VectorType instead. + let ASTNode = 0; +} + // This is not a TargetSpecificAttr so that is silently accepted and // ignored on other targets as encouraged by the OpenCL spec. // diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2317,6 +2317,37 @@ }]; } +def RISCVRVVVectorBitsDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``riscv_rvv_vector_bits(N)`` attribute is used to define fixed-length +variants of sizeless types. + +For example: + +.. code-block:: c + + #include + + #if __RISCV_RVV_VLEN_BITS==512 + typedef vint8m1_t fixed_vint8m1_t __attribute__((riscv_rvv_vector_bits(512))); + #endif + +Creates a type ``fixed_vint8m1_t_t`` that is a fixed-length variant of +``vint8m1_t`` that contains exactly 512-bits. Unlike ``vint8m1_t``, this type +can be used in globals, structs, unions, and arrays, all of which are +unsupported for sizeless types. + +The attribute can be attached to a single RVV vector (such as ``vint8m1_t``). +The behavior of the attribute is undefined unless +``N==__RISCV_RVV_VLEN_BITS``, the implementation defined feature macro that +is enabled under the ``-mrvv-vector-bits`` flag. ``__RISCV_RVV_VLEN_BITS`` can +only be a power of 2 between 64 and 65536. + +Only `*m1_t`(LMUL=1) types are supported at this time. +}]; +} + def AVRInterruptDocs : Documentation { let Category = DocCatFunction; let Heading = "interrupt (AVR)"; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -3057,6 +3057,14 @@ "value of 128, 256, 512, 1024 or 2048.">; def err_sve_vector_in_non_sve_target : Error< "SVE vector type %0 cannot be used in a target without sve">; +def err_attribute_riscv_rvv_bits_unsupported : Error< + "%0 is only supported when '-mrvv-vector-bits=' is specified with a " + "value of \"zvl\" or a power 2 in the range [64,65536]">; +def err_attribute_bad_rvv_vector_size : Error< + "invalid RVV vector size '%0', must match value set by " + "'-mrvv-vector-bits' ('%1')">; +def err_attribute_invalid_rvv_type : Error< + "%0 attribute applied to non-RVV type %1">; def err_attribute_requires_positive_integer : Error< "%0 attribute requires a %select{positive|non-negative}1 " "integral compile time constant expression">; @@ -3167,8 +3175,9 @@ "vector size not an integral multiple of component size">; def err_attribute_zero_size : Error<"zero %0 size">; def err_attribute_size_too_large : Error<"%0 size too large">; -def err_typecheck_sve_ambiguous : Error< - "cannot combine fixed-length and sizeless SVE vectors in expression, result is ambiguous (%0 and %1)">; +def err_typecheck_sve_rvv_ambiguous : Error< + "cannot combine fixed-length and sizeless %select{SVE|RVV}0 vectors " + "in expression, result is ambiguous (%1 and %2)">; def err_typecheck_sve_rvv_gnu_ambiguous : Error< "cannot combine GNU and %select{SVE|RVV}0 vectors in expression, result is ambiguous (%1 and %2)">; def err_typecheck_vector_not_convertable_implict_truncation : Error< diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def --- a/clang/include/clang/Basic/RISCVVTypes.def +++ b/clang/include/clang/Basic/RISCVVTypes.def @@ -40,6 +40,10 @@ // //===----------------------------------------------------------------------===// +#ifndef RVV_TYPE +#define RVV_TYPE(Name, Id, SingletonId) +#endif + #ifndef RVV_VECTOR_TYPE #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP)\ RVV_TYPE(Name, Id, SingletonId) diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -162,6 +162,9 @@ /// Arm SVE Vector conversions ICK_SVE_Vector_Conversion, + /// RISC-V RVV Vector conversions + ICK_RVV_Vector_Conversion, + /// A vector splat from an arithmetic type ICK_Vector_Splat, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -12685,6 +12685,7 @@ SourceLocation Loc, bool IsCompAssign); bool isValidSveBitcast(QualType srcType, QualType destType); + bool isValidRVVBitcast(QualType srcType, QualType destType); bool areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy); diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -85,6 +85,7 @@ #include "llvm/Support/MD5.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/RISCVTargetParser.h" #include "llvm/TargetParser/Triple.h" #include #include @@ -2010,6 +2011,9 @@ else if (VT->getVectorKind() == VectorType::SveFixedLengthPredicateVector) // Adjust the alignment for fixed-length SVE predicates. Align = 16; + else if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector) + // Adjust the alignment for fixed-length RVV vectors. + Align = 64; break; } @@ -9454,7 +9458,9 @@ First->getVectorKind() != VectorType::SveFixedLengthDataVector && First->getVectorKind() != VectorType::SveFixedLengthPredicateVector && Second->getVectorKind() != VectorType::SveFixedLengthDataVector && - Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector) + Second->getVectorKind() != VectorType::SveFixedLengthPredicateVector && + First->getVectorKind() != VectorType::RVVFixedLengthDataVector && + Second->getVectorKind() != VectorType::RVVFixedLengthDataVector) return true; return false; @@ -9552,6 +9558,85 @@ IsLaxCompatible(SecondType, FirstType); } +/// getRVVTypeSize - Return RVV vector register size. +static uint64_t getRVVTypeSize(ASTContext &Context, const BuiltinType *Ty) { + assert(Ty->isRVVVLSBuiltinType() && "Invalid RVV Type"); + auto VScale = Context.getTargetInfo().getVScaleRange(Context.getLangOpts()); + return VScale ? VScale->first * llvm::RISCV::RVVBitsPerBlock : 0; +} + +bool ASTContext::areCompatibleRVVTypes(QualType FirstType, + QualType SecondType) { + assert( + ((FirstType->isRVVSizelessBuiltinType() && SecondType->isVectorType()) || + (FirstType->isVectorType() && SecondType->isRVVSizelessBuiltinType())) && + "Expected RVV builtin type and vector type!"); + + auto IsValidCast = [this](QualType FirstType, QualType SecondType) { + if (const auto *BT = FirstType->getAs()) { + if (const auto *VT = SecondType->getAs()) { + // Predicates have the same representation as uint8 so we also have to + // check the kind to make these types incompatible. + if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector) + return FirstType->isRVVVLSBuiltinType() && + VT->getElementType().getCanonicalType() == + FirstType->getRVVEltType(*this); + if (VT->getVectorKind() == VectorType::GenericVector) + return getTypeSize(SecondType) == getRVVTypeSize(*this, BT) && + hasSameType(VT->getElementType(), + getBuiltinVectorTypeInfo(BT).ElementType); + } + } + return false; + }; + + return IsValidCast(FirstType, SecondType) || + IsValidCast(SecondType, FirstType); +} + +bool ASTContext::areLaxCompatibleRVVTypes(QualType FirstType, + QualType SecondType) { + assert( + ((FirstType->isRVVSizelessBuiltinType() && SecondType->isVectorType()) || + (FirstType->isVectorType() && SecondType->isRVVSizelessBuiltinType())) && + "Expected RVV builtin type and vector type!"); + + auto IsLaxCompatible = [this](QualType FirstType, QualType SecondType) { + const auto *BT = FirstType->getAs(); + if (!BT) + return false; + + const auto *VecTy = SecondType->getAs(); + if (VecTy && + (VecTy->getVectorKind() == VectorType::RVVFixedLengthDataVector || + VecTy->getVectorKind() == VectorType::GenericVector)) { + const LangOptions::LaxVectorConversionKind LVCKind = + getLangOpts().getLaxVectorConversions(); + + // If __RISCV_RVV_VLEN_BITS != N do not allow GNU vector lax conversion. + if (VecTy->getVectorKind() == VectorType::GenericVector && + getTypeSize(SecondType) != getRVVTypeSize(*this, BT)) + return false; + + // If -flax-vector-conversions=all is specified, the types are + // certainly compatible. + if (LVCKind == LangOptions::LaxVectorConversionKind::All) + return true; + + // If -flax-vector-conversions=integer is specified, the types are + // compatible if the elements are integer types. + if (LVCKind == LangOptions::LaxVectorConversionKind::Integer) + return VecTy->getElementType().getCanonicalType()->isIntegerType() && + FirstType->getRVVEltType(*this)->isIntegerType(); + } + + return false; + }; + + return IsLaxCompatible(FirstType, SecondType) || + IsLaxCompatible(SecondType, FirstType); +} + bool ASTContext::hasDirectOwnershipQualifier(QualType Ty) const { while (true) { // __strong id diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -563,6 +563,8 @@ void mangleAArch64NeonVectorType(const DependentVectorType *T); void mangleAArch64FixedSveVectorType(const VectorType *T); void mangleAArch64FixedSveVectorType(const DependentVectorType *T); + void mangleRISCVFixedRVVVectorType(const VectorType *T); + void mangleRISCVFixedRVVVectorType(const DependentVectorType *T); void mangleIntegerLiteral(QualType T, const llvm::APSInt &Value); void mangleFloatLiteral(QualType T, const llvm::APFloat &V); @@ -3812,6 +3814,68 @@ Diags.Report(T->getAttributeLoc(), DiagID); } +void CXXNameMangler::mangleRISCVFixedRVVVectorType(const VectorType *T) { + assert(T->getVectorKind() == VectorType::RVVFixedLengthDataVector && + "expected fixed-length RVV vector!"); + + QualType EltType = T->getElementType(); + assert(EltType->isBuiltinType() && + "expected builtin type for fixed-length RVV vector!"); + + StringRef TypeName; + switch (cast(EltType)->getKind()) { + case BuiltinType::SChar: + TypeName = "__rvv_int8m1_t"; + break; + case BuiltinType::UChar: + TypeName = "__rvv_uint8m1_t"; + break; + case BuiltinType::Short: + TypeName = "__rvv_int16m1_t"; + break; + case BuiltinType::UShort: + TypeName = "__rvv_uint16m1_t"; + break; + case BuiltinType::Int: + TypeName = "__rvv_int32m1_t"; + break; + case BuiltinType::UInt: + TypeName = "__rvv_uint32m1_t"; + break; + case BuiltinType::Long: + TypeName = "__rvv_int64m1_t"; + break; + case BuiltinType::ULong: + TypeName = "__rvv_uint64m1_t"; + break; + case BuiltinType::Half: + TypeName = "__rvv_float16m1_t"; + break; + case BuiltinType::Float: + TypeName = "__rvv_float32m1_t"; + break; + case BuiltinType::Double: + TypeName = "__rvv_float64m1_t"; + break; + default: + llvm_unreachable("unexpected element type for fixed-length RVV vector!"); + } + + unsigned VecSizeInBits = getASTContext().getTypeInfo(T).Width; + + Out << "9__RVV_VLSI" << 'u' << TypeName.size() << TypeName << "Lj" + << VecSizeInBits << "EE"; +} + +void CXXNameMangler::mangleRISCVFixedRVVVectorType( + const DependentVectorType *T) { + DiagnosticsEngine &Diags = Context.getDiags(); + unsigned DiagID = Diags.getCustomDiagID( + DiagnosticsEngine::Error, + "cannot mangle this dependent fixed-length RVV vector type yet"); + Diags.Report(T->getAttributeLoc(), DiagID); +} + // GNU extension: vector types // ::= // ::= Dv _ @@ -3836,6 +3900,9 @@ T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) { mangleAArch64FixedSveVectorType(T); return; + } else if (T->getVectorKind() == VectorType::RVVFixedLengthDataVector) { + mangleRISCVFixedRVVVectorType(T); + return; } Out << "Dv" << T->getNumElements() << '_'; if (T->getVectorKind() == VectorType::AltiVecPixel) @@ -3862,6 +3929,9 @@ T->getVectorKind() == VectorType::SveFixedLengthPredicateVector) { mangleAArch64FixedSveVectorType(T); return; + } else if (T->getVectorKind() == VectorType::RVVFixedLengthDataVector) { + mangleRISCVFixedRVVVectorType(T); + return; } Out << "Dv"; diff --git a/clang/lib/AST/JSONNodeDumper.cpp b/clang/lib/AST/JSONNodeDumper.cpp --- a/clang/lib/AST/JSONNodeDumper.cpp +++ b/clang/lib/AST/JSONNodeDumper.cpp @@ -662,6 +662,9 @@ case VectorType::SveFixedLengthPredicateVector: JOS.attribute("vectorKind", "fixed-length sve predicate vector"); break; + case VectorType::RVVFixedLengthDataVector: + JOS.attribute("vectorKind", "fixed-length rvv data vector"); + break; } } diff --git a/clang/lib/AST/TextNodeDumper.cpp b/clang/lib/AST/TextNodeDumper.cpp --- a/clang/lib/AST/TextNodeDumper.cpp +++ b/clang/lib/AST/TextNodeDumper.cpp @@ -1495,6 +1495,9 @@ case VectorType::SveFixedLengthPredicateVector: OS << " fixed-length sve predicate vector"; break; + case VectorType::RVVFixedLengthDataVector: + OS << " fixed-length rvv data vector"; + break; } OS << " " << T->getNumElements(); } diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/TargetParser/RISCVTargetParser.h" #include #include #include @@ -1928,6 +1929,11 @@ (VT->getKind() >= BuiltinType::SveInt8 && VT->getKind() <= BuiltinType::SveUint64); } + if (CanonicalType->isRVVVLSBuiltinType()) { + const auto *VT = cast(CanonicalType); + return (VT->getKind() >= BuiltinType::RvvInt8mf8 && + VT->getKind() <= BuiltinType::RvvUint64m8); + } return isIntegerType(); } @@ -2425,6 +2431,28 @@ return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType; } +bool Type::isRVVVLSBuiltinType() const { + if (const BuiltinType *BT = getAs()) { + switch (BT->getKind()) { + // FIXME: Support more than LMUL 1. +#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP) \ + case BuiltinType::Id: \ + return NF == 1 && (NumEls * ElBits) == llvm::RISCV::RVVBitsPerBlock; +#include "clang/Basic/RISCVVTypes.def" + default: + return false; + } + } + return false; +} + +QualType Type::getRVVEltType(const ASTContext &Ctx) const { + assert(isRVVVLSBuiltinType() && "unsupported type!"); + + const BuiltinType *BTy = getAs(); + return Ctx.getBuiltinVectorTypeInfo(BTy).ElementType; +} + bool QualType::isPODType(const ASTContext &Context) const { // C++11 has a more relaxed definition of POD. if (Context.getLangOpts().CPlusPlus11) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -687,6 +687,19 @@ else OS << T->getNumElements(); + OS << " * sizeof("; + print(T->getElementType(), OS, StringRef()); + // Multiply by 8 for the number of bits. + OS << ") * 8))) "; + printBefore(T->getElementType(), OS); + break; + case VectorType::RVVFixedLengthDataVector: + // FIXME: We prefer to print the size directly here, but have no way + // to get the size of the type. + OS << "__attribute__((__riscv_rvv_vector_bits__("; + + OS << T->getNumElements(); + OS << " * sizeof("; print(T->getElementType(), OS, StringRef()); // Multiply by 8 for the number of bits. @@ -759,6 +772,20 @@ OS << "))) "; printBefore(T->getElementType(), OS); break; + case VectorType::RVVFixedLengthDataVector: + // FIXME: We prefer to print the size directly here, but have no way + // to get the size of the type. + OS << "__attribute__((__riscv_rvv_vector_bits__("; + if (T->getSizeExpr()) { + T->getSizeExpr()->printPretty(OS, nullptr, Policy); + OS << " * sizeof("; + print(T->getElementType(), OS, StringRef()); + // Multiply by 8 for the number of bits. + OS << ") * 8"; + } + OS << "))) "; + printBefore(T->getElementType(), OS); + break; } } diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp --- a/clang/lib/Basic/Targets/RISCV.cpp +++ b/clang/lib/Basic/Targets/RISCV.cpp @@ -200,6 +200,11 @@ // Currently we support the v0.11 RISC-V V intrinsics. Builder.defineMacro("__riscv_v_intrinsic", Twine(getVersionValue(0, 11))); } + + auto VScale = getVScaleRange(Opts); + if (VScale && VScale->first && VScale->first == VScale->second) + Builder.defineMacro("__RISCV_RVV_VLEN_BITS", + Twine(VScale->first * llvm::RISCV::RVVBitsPerBlock)); } static constexpr Builtin::Info BuiltinInfo[] = { diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -33,6 +33,7 @@ #include "llvm/IR/Type.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/TargetParser/RISCVTargetParser.h" #include "llvm/TargetParser/Triple.h" #include @@ -11108,6 +11109,8 @@ CharUnits Field1Off, llvm::Type *Field2Ty, CharUnits Field2Off) const; + + ABIArgInfo coerceVLSVector(QualType Ty) const; }; } // end anonymous namespace @@ -11351,6 +11354,44 @@ return ABIArgInfo::getCoerceAndExpand(CoerceToType, UnpaddedCoerceToType); } +// Fixed-length RVV vectors are represented as scalable vectors in function +// args/return and must be coerced from fixed vectors. +ABIArgInfo RISCVABIInfo::coerceVLSVector(QualType Ty) const { + assert(Ty->isVectorType() && "expected vector type!"); + + const auto *VT = Ty->castAs(); + assert(VT->getVectorKind() == VectorType::RVVFixedLengthDataVector && + "Unexpected vector kind"); + + assert(VT->getElementType()->isBuiltinType() && "expected builtin type!"); + + const auto *BT = VT->getElementType()->castAs(); + llvm::ScalableVectorType *ResType = nullptr; + unsigned EltSize = getContext().getTypeSize(BT); + switch (BT->getKind()) { + default: + llvm_unreachable("unexpected builtin type for RISC-V vector!"); + case BuiltinType::SChar: + case BuiltinType::UChar: + case BuiltinType::Short: + case BuiltinType::UShort: + case BuiltinType::Int: + case BuiltinType::UInt: + case BuiltinType::Long: + case BuiltinType::ULong: + case BuiltinType::LongLong: + case BuiltinType::ULongLong: + case BuiltinType::Half: + case BuiltinType::Float: + case BuiltinType::Double: + ResType = + llvm::ScalableVectorType::get(CGT.ConvertType(VT->getElementType()), + llvm::RISCV::RVVBitsPerBlock / EltSize); + break; + } + return ABIArgInfo::getDirect(ResType); +} + ABIArgInfo RISCVABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, int &ArgGPRsLeft, int &ArgFPRsLeft) const { @@ -11446,6 +11487,10 @@ return ABIArgInfo::getDirect(); } + if (const VectorType *VT = Ty->getAs()) + if (VT->getVectorKind() == VectorType::RVVFixedLengthDataVector) + return coerceVLSVector(Ty); + // Aggregates which are <= 2*XLen will be passed in registers if possible, // so coerce to integers. if (Size <= 2 * XLen) { diff --git a/clang/lib/Sema/SemaCast.cpp b/clang/lib/Sema/SemaCast.cpp --- a/clang/lib/Sema/SemaCast.cpp +++ b/clang/lib/Sema/SemaCast.cpp @@ -2350,6 +2350,12 @@ return TC_Success; } + // Allow bitcasting between SVE VLATs and VLSTs, and vice-versa. + if (Self.isValidRVVBitcast(SrcType, DestType)) { + Kind = CK_BitCast; + return TC_Success; + } + // The non-vector type, if any, must have integral type. This is // the same rule that C vector casts use; note, however, that enum // types are not integral in C++. @@ -2937,6 +2943,13 @@ return; } + // Allow bitcasting between compatible RVV vector types. + if ((SrcType->isVectorType() || DestType->isVectorType()) && + Self.isValidRVVBitcast(SrcType, DestType)) { + Kind = CK_BitCast; + return; + } + if (!DestType->isScalarType() && !DestType->isVectorType() && !DestType->isMatrixType()) { const RecordType *DestRecordTy = DestType->getAs(); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -13994,6 +13994,13 @@ QualType(Source, 0)))) return; + if (Target->isRVVVLSBuiltinType() && + (S.Context.areCompatibleRVVTypes(QualType(Target, 0), + QualType(Source, 0)) || + S.Context.areLaxCompatibleRVVTypes(QualType(Target, 0), + QualType(Source, 0)))) + return; + if (!isa(Target)) { if (S.SourceMgr.isInSystemMacro(CC)) return; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7977,6 +7977,28 @@ ValidScalableConversion(destTy, srcTy); } +/// Are the two types RVV-bitcast-compatible types? I.e. is bitcasting from the +/// first RVV type (e.g. an RVV scalable type) to the second type (e.g. an RVV +/// VLS type) allowed? +/// +/// This will also return false if the two given types do not make sense from +/// the perspective of RVV bitcasts. +bool Sema::isValidRVVBitcast(QualType srcTy, QualType destTy) { + assert(srcTy->isVectorType() || destTy->isVectorType()); + + auto ValidScalableConversion = [](QualType FirstType, QualType SecondType) { + if (!FirstType->isRVVSizelessBuiltinType()) + return false; + + const auto *VecTy = SecondType->getAs(); + return VecTy && + VecTy->getVectorKind() == VectorType::RVVFixedLengthDataVector; + }; + + return ValidScalableConversion(srcTy, destTy) || + ValidScalableConversion(destTy, srcTy); +} + /// Are the two types matrix types and do they have the same dimensions i.e. /// do they have the same number of rows and the same number of columns? bool Sema::areMatrixTypesOfTheSameDimension(QualType srcTy, QualType destTy) { @@ -9909,6 +9931,16 @@ return Compatible; } + // Allow assignments between fixed-length and sizeless RVV vectors. + if ((LHSType->isRVVSizelessBuiltinType() && RHSType->isVectorType()) || + (LHSType->isVectorType() && RHSType->isRVVSizelessBuiltinType())) { + if (Context.areCompatibleRVVTypes(LHSType, RHSType) || + Context.areLaxCompatibleRVVTypes(LHSType, RHSType)) { + Kind = CK_BitCast; + return Compatible; + } + } + return Incompatible; } @@ -10765,18 +10797,30 @@ } } - // Expressions containing fixed-length and sizeless SVE vectors are invalid - // since the ambiguity can affect the ABI. - auto IsSveConversion = [](QualType FirstType, QualType SecondType) { + // Expressions containing fixed-length and sizeless SVE/RVV vectors are + // invalid since the ambiguity can affect the ABI. + auto IsSveRVVConversion = [](QualType FirstType, QualType SecondType, + unsigned &SVEorRVV) { const VectorType *VecType = SecondType->getAs(); - return FirstType->isSizelessBuiltinType() && VecType && - (VecType->getVectorKind() == VectorType::SveFixedLengthDataVector || - VecType->getVectorKind() == - VectorType::SveFixedLengthPredicateVector); + SVEorRVV = 0; + if (FirstType->isSizelessBuiltinType() && VecType) { + if (VecType->getVectorKind() == VectorType::SveFixedLengthDataVector || + VecType->getVectorKind() == VectorType::SveFixedLengthPredicateVector) + return true; + if (VecType->getVectorKind() == VectorType::RVVFixedLengthDataVector) { + SVEorRVV = 1; + return true; + } + } + + return false; }; - if (IsSveConversion(LHSType, RHSType) || IsSveConversion(RHSType, LHSType)) { - Diag(Loc, diag::err_typecheck_sve_ambiguous) << LHSType << RHSType; + unsigned SVEorRVV; + if (IsSveRVVConversion(LHSType, RHSType, SVEorRVV) || + IsSveRVVConversion(RHSType, LHSType, SVEorRVV)) { + Diag(Loc, diag::err_typecheck_sve_rvv_ambiguous) + << SVEorRVV << LHSType << RHSType; return QualType(); } @@ -10788,12 +10832,21 @@ const VectorType *SecondVecType = SecondType->getAs(); SVEorRVV = 0; - if (FirstVecType && SecondVecType) - return FirstVecType->getVectorKind() == VectorType::GenericVector && - (SecondVecType->getVectorKind() == - VectorType::SveFixedLengthDataVector || - SecondVecType->getVectorKind() == - VectorType::SveFixedLengthPredicateVector); + if (FirstVecType && SecondVecType) { + if (FirstVecType->getVectorKind() == VectorType::GenericVector) { + if (SecondVecType->getVectorKind() == + VectorType::SveFixedLengthDataVector || + SecondVecType->getVectorKind() == + VectorType::SveFixedLengthPredicateVector) + return true; + if (SecondVecType->getVectorKind() == + VectorType::RVVFixedLengthDataVector) { + SVEorRVV = 1; + return true; + } + } + return false; + } if (SecondVecType && SecondVecType->getVectorKind() == VectorType::GenericVector) { @@ -10808,7 +10861,6 @@ return false; }; - unsigned SVEorRVV; if (IsSveRVVGnuConversion(LHSType, RHSType, SVEorRVV) || IsSveRVVGnuConversion(RHSType, LHSType, SVEorRVV)) { Diag(Loc, diag::err_typecheck_sve_rvv_gnu_ambiguous) diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -4571,6 +4571,7 @@ break; case ICK_SVE_Vector_Conversion: + case ICK_RVV_Vector_Conversion: From = ImpCastExprToType(From, ToType, CK_BitCast, VK_PRValue, /*BasePath=*/nullptr, CCK) .get(); diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -141,6 +141,7 @@ ICR_Conversion, ICR_Conversion, ICR_Conversion, + ICR_Conversion, ICR_OCL_Scalar_Widening, ICR_Complex_Real_Conversion, ICR_Conversion, @@ -183,6 +184,7 @@ "Derived-to-base conversion", "Vector conversion", "SVE Vector conversion", + "RVV Vector conversion", "Vector splat", "Complex-real conversion", "Block Pointer conversion", @@ -1761,6 +1763,14 @@ return true; } + if (ToType->isRVVSizelessBuiltinType() || + FromType->isRVVSizelessBuiltinType()) + if (S.Context.areCompatibleRVVTypes(FromType, ToType) || + S.Context.areLaxCompatibleRVVTypes(FromType, ToType)) { + ICK = ICK_RVV_Vector_Conversion; + return true; + } + // We can perform the conversion between vector types in the following cases: // 1)vector types are equivalent AltiVec and GCC vector types // 2)lax vector conversions are permitted and the vector types are of the @@ -4327,6 +4337,20 @@ : ImplicitConversionSequence::Worse; } + if (SCS1.Second == ICK_RVV_Vector_Conversion && + SCS2.Second == ICK_RVV_Vector_Conversion) { + bool SCS1IsCompatibleRVVVectorConversion = + S.Context.areCompatibleRVVTypes(SCS1.getFromType(), SCS1.getToType(2)); + bool SCS2IsCompatibleRVVVectorConversion = + S.Context.areCompatibleRVVTypes(SCS2.getFromType(), SCS2.getToType(2)); + + if (SCS1IsCompatibleRVVVectorConversion != + SCS2IsCompatibleRVVVectorConversion) + return SCS1IsCompatibleRVVVectorConversion + ? ImplicitConversionSequence::Better + : ImplicitConversionSequence::Worse; + } + return ImplicitConversionSequence::Indistinguishable; } @@ -5765,6 +5789,7 @@ case ICK_Derived_To_Base: case ICK_Vector_Conversion: case ICK_SVE_Vector_Conversion: + case ICK_RVV_Vector_Conversion: case ICK_Vector_Splat: case ICK_Complex_Real: case ICK_Block_Pointer_Conversion: diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -40,6 +40,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/TargetParser/RISCVTargetParser.h" #include #include @@ -8296,6 +8297,67 @@ CurType, CurType); } +/// HandleRISCVRVVVectorBitsTypeAttr - The "riscv_rvv_vector_bits" attribute is +/// used to create fixed-length versions of sizeless RVV types such as +/// vint8m1_t_t. +static void HandleRISCVRVVVectorBitsTypeAttr(QualType &CurType, + ParsedAttr &Attr, Sema &S) { + // Target must have vector extension. + if (!S.Context.getTargetInfo().hasFeature("zve32x")) { + S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) + << Attr << "'zve32x'"; + Attr.setInvalid(); + return; + } + + auto VScale = S.Context.getTargetInfo().getVScaleRange(S.getLangOpts()); + if (!VScale || !VScale->first || VScale->first != VScale->second) { + S.Diag(Attr.getLoc(), diag::err_attribute_riscv_rvv_bits_unsupported) + << Attr; + Attr.setInvalid(); + return; + } + + // Check the attribute arguments. + if (Attr.getNumArgs() != 1) { + S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments) + << Attr << 1; + Attr.setInvalid(); + return; + } + + // The vector size must be an integer constant expression. + llvm::APSInt RVVVectorSizeInBits(32); + if (!verifyValidIntegerConstantExpr(S, Attr, RVVVectorSizeInBits)) + return; + + unsigned VecSize = static_cast(RVVVectorSizeInBits.getZExtValue()); + + // The attribute vector size must match -mrvv-vector-bits. + // FIXME: Add support for types with LMUL!=1. Need to make sure size passed + // to attribute is equal to LMUL*VScaleMin*RVVBitsPerBlock. + if (VecSize != VScale->first * llvm::RISCV::RVVBitsPerBlock) { + S.Diag(Attr.getLoc(), diag::err_attribute_bad_rvv_vector_size) + << VecSize << VScale->first * llvm::RISCV::RVVBitsPerBlock; + Attr.setInvalid(); + return; + } + + // Attribute can only be attached to a single RVV vector type. + if (!CurType->isRVVVLSBuiltinType()) { + S.Diag(Attr.getLoc(), diag::err_attribute_invalid_rvv_type) + << Attr << CurType; + Attr.setInvalid(); + return; + } + + QualType EltType = CurType->getRVVEltType(S.Context); + unsigned TypeSize = S.Context.getTypeSize(EltType); + VectorType::VectorKind VecKind = VectorType::RVVFixedLengthDataVector; + VecSize /= TypeSize; + CurType = S.Context.getVectorType(EltType, VecSize, VecKind); +} + /// Handle OpenCL Access Qualifier Attribute. static void HandleOpenCLAccessAttr(QualType &CurType, const ParsedAttr &Attr, Sema &S) { @@ -8542,6 +8604,10 @@ attr.setUsedAsTypeAttr(); break; } + case ParsedAttr::AT_RISCVRVVVectorBits: + HandleRISCVRVVVectorBitsTypeAttr(type, attr, state.getSema()); + attr.setUsedAsTypeAttr(); + break; case ParsedAttr::AT_OpenCLAccess: HandleOpenCLAccessAttr(type, attr, state.getSema()); attr.setUsedAsTypeAttr(); diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-bitcast.c @@ -0,0 +1,140 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +#define DEFINE_STRUCT(ty) \ + struct struct_##ty { \ + fixed_##ty##_t x, y[3]; \ + } struct_##ty; + +DEFINE_STRUCT(int64m1) +DEFINE_STRUCT(float64m1) + +//===----------------------------------------------------------------------===// +// int64 +//===----------------------------------------------------------------------===// + +// CHECK-64-LABEL: @read_int64m1( +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-64-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v1i64( undef, <1 x i64> [[TMP0]], i64 0) +// CHECK-64-NEXT: ret [[CASTSCALABLESVE]] +// +// CHECK-128-LABEL: @read_int64m1( +// CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v2i64( undef, <2 x i64> [[TMP0]], i64 0) +// CHECK-128-NEXT: ret [[CASTSCALABLESVE]] +// +// CHECK-256-LABEL: @read_int64m1( +// CHECK-256-NEXT: entry: +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[Y]], align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[TMP0]], i64 0) +// CHECK-256-NEXT: ret [[CASTSCALABLESVE]] +// +vint64m1_t read_int64m1(struct struct_int64m1 *s) { + return s->y[0]; +} + +// CHECK-64-LABEL: @write_int64m1( +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[X:%.*]], i64 0) +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-64-NEXT: store <1 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: ret void +// +// CHECK-128-LABEL: @write_int64m1( +// CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <2 x i64> @llvm.vector.extract.v2i64.nxv1i64( [[X:%.*]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-128-NEXT: store <2 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-128-NEXT: ret void +// +// CHECK-256-LABEL: @write_int64m1( +// CHECK-256-NEXT: entry: +// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[X:%.*]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_INT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-256-NEXT: store <4 x i64> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: ret void +// +void write_int64m1(struct struct_int64m1 *s, vint64m1_t x) { + s->y[0] = x; +} + +//===----------------------------------------------------------------------===// +// float64 +//===----------------------------------------------------------------------===// + +// CHECK-64-LABEL: @read_float64m1( +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-64-NEXT: [[TMP0:%.*]] = load <1 x double>, ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v1f64( undef, <1 x double> [[TMP0]], i64 0) +// CHECK-64-NEXT: ret [[CASTSCALABLESVE]] +// +// CHECK-128-LABEL: @read_float64m1( +// CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-128-NEXT: [[TMP0:%.*]] = load <2 x double>, ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v2f64( undef, <2 x double> [[TMP0]], i64 0) +// CHECK-128-NEXT: ret [[CASTSCALABLESVE]] +// +// CHECK-256-LABEL: @read_float64m1( +// CHECK-256-NEXT: entry: +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-256-NEXT: [[TMP0:%.*]] = load <4 x double>, ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[TMP0]], i64 0) +// CHECK-256-NEXT: ret [[CASTSCALABLESVE]] +// +vfloat64m1_t read_float64m1(struct struct_float64m1 *s) { + return s->y[0]; +} + +// CHECK-64-LABEL: @write_float64m1( +// CHECK-64-NEXT: entry: +// CHECK-64-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <1 x double> @llvm.vector.extract.v1f64.nxv1f64( [[X:%.*]], i64 0) +// CHECK-64-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-64-NEXT: store <1 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-64-NEXT: ret void +// +// CHECK-128-LABEL: @write_float64m1( +// CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <2 x double> @llvm.vector.extract.v2f64.nxv1f64( [[X:%.*]], i64 0) +// CHECK-128-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-128-NEXT: store <2 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-128-NEXT: ret void +// +// CHECK-256-LABEL: @write_float64m1( +// CHECK-256-NEXT: entry: +// CHECK-256-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[X:%.*]], i64 0) +// CHECK-256-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_STRUCT_FLOAT64M1:%.*]], ptr [[S:%.*]], i64 0, i32 1 +// CHECK-256-NEXT: store <4 x double> [[CASTFIXEDSVE]], ptr [[Y]], align 8, !tbaa [[TBAA4]] +// CHECK-256-NEXT: ret void +// +void write_float64m1(struct struct_float64m1 *s, vfloat64m1_t x) { + s->y[0] = x; +} diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-call.c @@ -0,0 +1,117 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +//===----------------------------------------------------------------------===// +// Test caller/callee with VLST <-> VLAT +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @sizeless_callee( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[X:%.*]] +// +vint32m1_t sizeless_callee(vint32m1_t x) { + return x; +} + +// CHECK-LABEL: @fixed_caller( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[X_COERCE:%.*]] +// +fixed_int32m1_t fixed_caller(fixed_int32m1_t x) { + return sizeless_callee(x); +} + +// CHECK-LABEL: @fixed_callee( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[X_COERCE:%.*]] +// +fixed_int32m1_t fixed_callee(fixed_int32m1_t x) { + return x; +} + +// CHECK-LABEL: @sizeless_caller( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[COERCE1:%.*]] = alloca <8 x i32>, align 8 +// CHECK-NEXT: store [[X:%.*]], ptr [[COERCE1]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr [[COERCE1]], align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: [[CASTSCALABLESVE2:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TMP0]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE2]] +// +vint32m1_t sizeless_caller(vint32m1_t x) { + return fixed_callee(x); +} + +//===----------------------------------------------------------------------===// +// fixed, fixed +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @call_int32_ff( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vadd.nxv2i32.nxv2i32.i64( poison, [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 8) +// CHECK-NEXT: ret [[TMP0]] +// +fixed_int32m1_t call_int32_ff(fixed_int32m1_t op1, fixed_int32m1_t op2) { + return __riscv_vadd(op1, op2, N/32); +} + +// CHECK-LABEL: @call_float64_ff( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( poison, [[OP1_COERCE:%.*]], [[OP2_COERCE:%.*]], i64 4) +// CHECK-NEXT: ret [[TMP0]] +// +fixed_float64m1_t call_float64_ff(fixed_float64m1_t op1, fixed_float64m1_t op2) { + return __riscv_vfadd(op1, op2, N/64); +} + +//===----------------------------------------------------------------------===// +// fixed, scalable +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @call_int32_fs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vadd.nxv2i32.nxv2i32.i64( poison, [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 8) +// CHECK-NEXT: ret [[TMP0]] +// +fixed_int32m1_t call_int32_fs(fixed_int32m1_t op1, vint32m1_t op2) { + return __riscv_vadd(op1, op2, N/32); +} + +// CHECK-LABEL: @call_float64_fs( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( poison, [[OP1_COERCE:%.*]], [[OP2:%.*]], i64 4) +// CHECK-NEXT: ret [[TMP0]] +// +fixed_float64m1_t call_float64_fs(fixed_float64m1_t op1, vfloat64m1_t op2) { + return __riscv_vfadd(op1, op2, N/64); +} + +//===----------------------------------------------------------------------===// +// scalable, scalable +//===----------------------------------------------------------------------===// + +// CHECK-LABEL: @call_int32_ss( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vadd.nxv2i32.nxv2i32.i64( poison, [[OP1:%.*]], [[OP2:%.*]], i64 8) +// CHECK-NEXT: ret [[TMP0]] +// +fixed_int32m1_t call_int32_ss(vint32m1_t op1, vint32m1_t op2) { + return __riscv_vadd(op1, op2, N/32); +} + +// CHECK-LABEL: @call_float64_ss( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.riscv.vfadd.nxv1f64.nxv1f64.i64( poison, [[OP1:%.*]], [[OP2:%.*]], i64 4) +// CHECK-NEXT: ret [[TMP0]] +// +fixed_float64m1_t call_float64_ss(vfloat64m1_t op1, vfloat64m1_t op2) { + return __riscv_vfadd(op1, op2, N/64); +} diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-cast.c @@ -0,0 +1,110 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef int32_t gnu_int32m1_t __attribute__((vector_size(N / 8))); + +// CHECK-LABEL: @to_vint32m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// +vint32m1_t to_vint32m1_t(fixed_int32m1_t type) { + return type; +} + +// CHECK-LABEL: @from_vint32m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[TYPE:%.*]] +// +fixed_int32m1_t from_vint32m1_t(vint32m1_t type) { + return type; +} + +// CHECK-LABEL: @to_vfloat64m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[TYPE_COERCE:%.*]] +// +vfloat64m1_t to_vfloat64m1_t(fixed_float64m1_t type) { + return type; +} + +// CHECK-LABEL: @from_vfloat64m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret [[TYPE:%.*]] +// +fixed_float64m1_t from_vfloat64m1_t(vfloat64m1_t type) { + return type; +} + +// CHECK-LABEL: @lax_cast( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SAVED_VALUE:%.*]] = alloca <8 x i32>, align 32 +// CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE:%.*]], i64 0) +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4:![0-9]+]] +// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[SAVED_VALUE]], align 32, !tbaa [[TBAA4]] +// CHECK-NEXT: ret [[TMP0]] +// +vint64m1_t lax_cast(fixed_int32m1_t type) { + return type; +} + +// CHECK-LABEL: @to_vint32m1_t__from_gnu_int32m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TYPE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +vint32m1_t to_vint32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { + return type; +} + +// CHECK-LABEL: @from_vint32m1_t__to_gnu_int32m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE:%.*]], i64 0) +// CHECK-NEXT: store <8 x i32> [[CASTFIXEDSVE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-NEXT: ret void +// +gnu_int32m1_t from_vint32m1_t__to_gnu_int32m1_t(vint32m1_t type) { + return type; +} + +// CHECK-LABEL: @to_fixed_int32m1_t__from_gnu_int32m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TYPE:%.*]] = load <8 x i32>, ptr [[TMP0:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TYPE]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t to_fixed_int32m1_t__from_gnu_int32m1_t(gnu_int32m1_t type) { + return type; +} + +// CHECK-LABEL: @from_fixed_int32m1_t__to_gnu_int32m1_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TYPE:%.*]] = tail call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TYPE_COERCE:%.*]], i64 0) +// CHECK-NEXT: store <8 x i32> [[TYPE]], ptr [[AGG_RESULT:%.*]], align 32, !tbaa [[TBAA4]] +// CHECK-NEXT: ret void +// +gnu_int32m1_t from_fixed_int32m1_t__to_gnu_int32m1_t(fixed_int32m1_t type) { + return type; +} diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-codegen.c @@ -0,0 +1,79 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); + +fixed_int32m1_t global_vec; + +// CHECK-LABEL: @test_ptr_to_global( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <8 x i32>, align 8 +// CHECK-NEXT: [[GLOBAL_VEC_PTR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr @global_vec, ptr [[GLOBAL_VEC_PTR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[GLOBAL_VEC_PTR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[TMP0]], align 8 +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TMP2]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t test_ptr_to_global() { + fixed_int32m1_t *global_vec_ptr; + global_vec_ptr = &global_vec; + return *global_vec_ptr; +} + +// +// Test casting pointer from fixed-length array to scalable vector. +// CHECK-LABEL: @array_arg( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <8 x i32>, align 8 +// CHECK-NEXT: [[ARR_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[ARR:%.*]], ptr [[ARR_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[ARR_ADDR]], align 8 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds <8 x i32>, ptr [[TMP0]], i64 0 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr [[ARRAYIDX]], align 8 +// CHECK-NEXT: store <8 x i32> [[TMP1]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TMP2]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t array_arg(fixed_int32m1_t arr[]) { + return arr[0]; +} + +// CHECK-LABEL: @test_cast( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <8 x i32>, align 8 +// CHECK-NEXT: [[VEC_ADDR:%.*]] = alloca , align 4 +// CHECK-NEXT: store [[VEC:%.*]], ptr [[VEC_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x i32>, ptr @global_vec, align 8 +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TMP0]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = load , ptr [[VEC_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call @llvm.riscv.vadd.nxv2i32.nxv2i32.i64( poison, [[CASTSCALABLESVE]], [[TMP1]], i64 8) +// CHECK-NEXT: [[CASTFIXEDSVE:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[TMP2]], i64 0) +// CHECK-NEXT: store <8 x i32> [[CASTFIXEDSVE]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr [[RETVAL]], align 8 +// CHECK-NEXT: [[CASTSCALABLESVE1:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[TMP3]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE1]] +// +fixed_int32m1_t test_cast(vint32m1_t vec) { + return __riscv_vadd(global_vec, vec, N/32); +} diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-globals.c @@ -0,0 +1,60 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -O1 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +fixed_int64m1_t global_i64; + +//===----------------------------------------------------------------------===// +// WRITES +//===----------------------------------------------------------------------===// + +// CHECK-128-LABEL: @write_global_i64( +// CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <1 x i64> @llvm.vector.extract.v1i64.nxv1i64( [[V:%.*]], i64 0) +// CHECK-128-NEXT: store <1 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-128-NEXT: ret void +// +// CHECK-512-LABEL: @write_global_i64( +// CHECK-512-NEXT: entry: +// CHECK-512-NEXT: [[CASTFIXEDSVE:%.*]] = tail call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[V:%.*]], i64 0) +// CHECK-512-NEXT: store <4 x i64> [[CASTFIXEDSVE]], ptr @global_i64, align 8, !tbaa [[TBAA4:![0-9]+]] +// CHECK-512-NEXT: ret void +// +void write_global_i64(vint64m1_t v) { global_i64 = v; } + +//===----------------------------------------------------------------------===// +// READS +//===----------------------------------------------------------------------===// + +// CHECK-128-LABEL: @read_global_i64( +// CHECK-128-NEXT: entry: +// CHECK-128-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA4]] +// CHECK-128-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v1i64( undef, <1 x i64> [[TMP0]], i64 0) +// CHECK-128-NEXT: ret [[CASTSCALABLESVE]] +// +// CHECK-512-LABEL: @read_global_i64( +// CHECK-512-NEXT: entry: +// CHECK-512-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr @global_i64, align 8, !tbaa [[TBAA4]] +// CHECK-512-NEXT: [[CASTSCALABLESVE:%.*]] = tail call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[TMP0]], i64 0) +// CHECK-512-NEXT: ret [[CASTSCALABLESVE]] +// +vint64m1_t read_global_i64() { return global_i64; } diff --git a/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/attr-riscv-rvv-vector-bits-types.c @@ -0,0 +1,485 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-256 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-512 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-1024 + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +// Define valid fixed-width RVV types +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +//===----------------------------------------------------------------------===// +// Structs and unions +//===----------------------------------------------------------------------===// +#define DEFINE_STRUCT(ty) \ + struct struct_##ty { \ + fixed_##ty##_t x; \ + } struct_##ty; + +#define DEFINE_UNION(ty) \ + union union_##ty { \ + fixed_##ty##_t x; \ + } union_##ty; + +DEFINE_STRUCT(int8m1) +DEFINE_STRUCT(int16m1) +DEFINE_STRUCT(int32m1) +DEFINE_STRUCT(int64m1) +DEFINE_STRUCT(uint8m1) +DEFINE_STRUCT(uint16m1) +DEFINE_STRUCT(uint32m1) +DEFINE_STRUCT(uint64m1) +DEFINE_STRUCT(float32m1) +DEFINE_STRUCT(float64m1) + +DEFINE_UNION(int8m1) +DEFINE_UNION(int16m1) +DEFINE_UNION(int32m1) +DEFINE_UNION(int64m1) +DEFINE_UNION(uint8m1) +DEFINE_UNION(uint16m1) +DEFINE_UNION(uint32m1) +DEFINE_UNION(uint64m1) +DEFINE_UNION(float32m1) +DEFINE_UNION(float64m1) + +//===----------------------------------------------------------------------===// +// Global variables +//===----------------------------------------------------------------------===// +fixed_int8m1_t global_i8; +fixed_int16m1_t global_i16; +fixed_int32m1_t global_i32; +fixed_int64m1_t global_i64; + +fixed_uint8m1_t global_u8; +fixed_uint16m1_t global_u16; +fixed_uint32m1_t global_u32; +fixed_uint64m1_t global_u64; + +fixed_float32m1_t global_f32; +fixed_float64m1_t global_f64; + +//===----------------------------------------------------------------------===// +// Global arrays +//===----------------------------------------------------------------------===// +fixed_int8m1_t global_arr_i8[3]; +fixed_int16m1_t global_arr_i16[3]; +fixed_int32m1_t global_arr_i32[3]; +fixed_int64m1_t global_arr_i64[3]; + +fixed_uint8m1_t global_arr_u8[3]; +fixed_uint16m1_t global_arr_u16[3]; +fixed_uint32m1_t global_arr_u32[3]; +fixed_uint64m1_t global_arr_u64[3]; + +fixed_float32m1_t global_arr_f32[3]; +fixed_float64m1_t global_arr_f64[3]; + +//===----------------------------------------------------------------------===// +// Locals +//===----------------------------------------------------------------------===// +void f() { + // Variables + fixed_int8m1_t local_i8; + fixed_int16m1_t local_i16; + fixed_int32m1_t local_i32; + fixed_int64m1_t local_i64; + fixed_uint8m1_t local_u8; + fixed_uint16m1_t local_u16; + fixed_uint32m1_t local_u32; + fixed_uint64m1_t local_u64; + fixed_float32m1_t local_f32; + fixed_float64m1_t local_f64; + + // Arrays + fixed_int8m1_t local_arr_i8[3]; + fixed_int16m1_t local_arr_i16[3]; + fixed_int32m1_t local_arr_i32[3]; + fixed_int64m1_t local_arr_i64[3]; + fixed_uint8m1_t local_arr_u8[3]; + fixed_uint16m1_t local_arr_u16[3]; + fixed_uint32m1_t local_arr_u32[3]; + fixed_uint64m1_t local_arr_u64[3]; + fixed_float32m1_t local_arr_f32[3]; + fixed_float64m1_t local_arr_f64[3]; +} + +//===----------------------------------------------------------------------===// +// Structs and unions +//===----------------------------------------------------------------------===// +// CHECK-64: %struct.struct_int8m1 = type { <8 x i8> } +// CHECK-64-NEXT: %struct.struct_int16m1 = type { <4 x i16> } +// CHECK-64-NEXT: %struct.struct_int32m1 = type { <2 x i32> } +// CHECK-64-NEXT: %struct.struct_int64m1 = type { <1 x i64> } +// CHECK-64-NEXT: %struct.struct_uint8m1 = type { <8 x i8> } +// CHECK-64-NEXT: %struct.struct_uint16m1 = type { <4 x i16> } +// CHECK-64-NEXT: %struct.struct_uint32m1 = type { <2 x i32> } +// CHECK-64-NEXT: %struct.struct_uint64m1 = type { <1 x i64> } +// CHECK-64-NEXT: %struct.struct_float32m1 = type { <2 x float> } +// CHECK-64-NEXT: %struct.struct_float64m1 = type { <1 x double> } + +// CHECK-128: %struct.struct_int8m1 = type { <16 x i8> } +// CHECK-128-NEXT: %struct.struct_int16m1 = type { <8 x i16> } +// CHECK-128-NEXT: %struct.struct_int32m1 = type { <4 x i32> } +// CHECK-128-NEXT: %struct.struct_int64m1 = type { <2 x i64> } +// CHECK-128-NEXT: %struct.struct_uint8m1 = type { <16 x i8> } +// CHECK-128-NEXT: %struct.struct_uint16m1 = type { <8 x i16> } +// CHECK-128-NEXT: %struct.struct_uint32m1 = type { <4 x i32> } +// CHECK-128-NEXT: %struct.struct_uint64m1 = type { <2 x i64> } +// CHECK-128-NEXT: %struct.struct_float32m1 = type { <4 x float> } +// CHECK-128-NEXT: %struct.struct_float64m1 = type { <2 x double> } + +// CHECK-256: %struct.struct_int8m1 = type { <32 x i8> } +// CHECK-256-NEXT: %struct.struct_int16m1 = type { <16 x i16> } +// CHECK-256-NEXT: %struct.struct_int32m1 = type { <8 x i32> } +// CHECK-256-NEXT: %struct.struct_int64m1 = type { <4 x i64> } +// CHECK-256-NEXT: %struct.struct_uint8m1 = type { <32 x i8> } +// CHECK-256-NEXT: %struct.struct_uint16m1 = type { <16 x i16> } +// CHECK-256-NEXT: %struct.struct_uint32m1 = type { <8 x i32> } +// CHECK-256-NEXT: %struct.struct_uint64m1 = type { <4 x i64> } +// CHECK-256-NEXT: %struct.struct_float32m1 = type { <8 x float> } +// CHECK-256-NEXT: %struct.struct_float64m1 = type { <4 x double> } + +// CHECK-512: %struct.struct_int8m1 = type { <64 x i8> } +// CHECK-512-NEXT: %struct.struct_int16m1 = type { <32 x i16> } +// CHECK-512-NEXT: %struct.struct_int32m1 = type { <16 x i32> } +// CHECK-512-NEXT: %struct.struct_int64m1 = type { <8 x i64> } +// CHECK-512-NEXT: %struct.struct_uint8m1 = type { <64 x i8> } +// CHECK-512-NEXT: %struct.struct_uint16m1 = type { <32 x i16> } +// CHECK-512-NEXT: %struct.struct_uint32m1 = type { <16 x i32> } +// CHECK-512-NEXT: %struct.struct_uint64m1 = type { <8 x i64> } +// CHECK-512-NEXT: %struct.struct_float32m1 = type { <16 x float> } +// CHECK-512-NEXT: %struct.struct_float64m1 = type { <8 x double> } + +// CHECK-1024: %struct.struct_int8m1 = type { <128 x i8> } +// CHECK-1024-NEXT: %struct.struct_int16m1 = type { <64 x i16> } +// CHECK-1024-NEXT: %struct.struct_int32m1 = type { <32 x i32> } +// CHECK-1024-NEXT: %struct.struct_int64m1 = type { <16 x i64> } +// CHECK-1024-NEXT: %struct.struct_uint8m1 = type { <128 x i8> } +// CHECK-1024-NEXT: %struct.struct_uint16m1 = type { <64 x i16> } +// CHECK-1024-NEXT: %struct.struct_uint32m1 = type { <32 x i32> } +// CHECK-1024-NEXT: %struct.struct_uint64m1 = type { <16 x i64> } +// CHECK-1024-NEXT: %struct.struct_float32m1 = type { <32 x float> } +// CHECK-1024-NEXT: %struct.struct_float64m1 = type { <16 x double> } + +// CHECK-64: %union.union_int8m1 = type { <8 x i8> } +// CHECK-64-NEXT: %union.union_int16m1 = type { <4 x i16> } +// CHECK-64-NEXT: %union.union_int32m1 = type { <2 x i32> } +// CHECK-64-NEXT: %union.union_int64m1 = type { <1 x i64> } +// CHECK-64-NEXT: %union.union_uint8m1 = type { <8 x i8> } +// CHECK-64-NEXT: %union.union_uint16m1 = type { <4 x i16> } +// CHECK-64-NEXT: %union.union_uint32m1 = type { <2 x i32> } +// CHECK-64-NEXT: %union.union_uint64m1 = type { <1 x i64> } +// CHECK-64-NEXT: %union.union_float32m1 = type { <2 x float> } +// CHECK-64-NEXT: %union.union_float64m1 = type { <1 x double> } + +// CHECK-128: %union.union_int8m1 = type { <16 x i8> } +// CHECK-128-NEXT: %union.union_int16m1 = type { <8 x i16> } +// CHECK-128-NEXT: %union.union_int32m1 = type { <4 x i32> } +// CHECK-128-NEXT: %union.union_int64m1 = type { <2 x i64> } +// CHECK-128-NEXT: %union.union_uint8m1 = type { <16 x i8> } +// CHECK-128-NEXT: %union.union_uint16m1 = type { <8 x i16> } +// CHECK-128-NEXT: %union.union_uint32m1 = type { <4 x i32> } +// CHECK-128-NEXT: %union.union_uint64m1 = type { <2 x i64> } +// CHECK-128-NEXT: %union.union_float32m1 = type { <4 x float> } +// CHECK-128-NEXT: %union.union_float64m1 = type { <2 x double> } + +// CHECK-256: %union.union_int8m1 = type { <32 x i8> } +// CHECK-256-NEXT: %union.union_int16m1 = type { <16 x i16> } +// CHECK-256-NEXT: %union.union_int32m1 = type { <8 x i32> } +// CHECK-256-NEXT: %union.union_int64m1 = type { <4 x i64> } +// CHECK-256-NEXT: %union.union_uint8m1 = type { <32 x i8> } +// CHECK-256-NEXT: %union.union_uint16m1 = type { <16 x i16> } +// CHECK-256-NEXT: %union.union_uint32m1 = type { <8 x i32> } +// CHECK-256-NEXT: %union.union_uint64m1 = type { <4 x i64> } +// CHECK-256-NEXT: %union.union_float32m1 = type { <8 x float> } +// CHECK-256-NEXT: %union.union_float64m1 = type { <4 x double> } + +// CHECK-512: %union.union_int8m1 = type { <64 x i8> } +// CHECK-512-NEXT: %union.union_int16m1 = type { <32 x i16> } +// CHECK-512-NEXT: %union.union_int32m1 = type { <16 x i32> } +// CHECK-512-NEXT: %union.union_int64m1 = type { <8 x i64> } +// CHECK-512-NEXT: %union.union_uint8m1 = type { <64 x i8> } +// CHECK-512-NEXT: %union.union_uint16m1 = type { <32 x i16> } +// CHECK-512-NEXT: %union.union_uint32m1 = type { <16 x i32> } +// CHECK-512-NEXT: %union.union_uint64m1 = type { <8 x i64> } +// CHECK-512-NEXT: %union.union_float32m1 = type { <16 x float> } +// CHECK-512-NEXT: %union.union_float64m1 = type { <8 x double> } + +// CHECK-1024: %union.union_int8m1 = type { <128 x i8> } +// CHECK-1024-NEXT: %union.union_int16m1 = type { <64 x i16> } +// CHECK-1024-NEXT: %union.union_int32m1 = type { <32 x i32> } +// CHECK-1024-NEXT: %union.union_int64m1 = type { <16 x i64> } +// CHECK-1024-NEXT: %union.union_uint8m1 = type { <128 x i8> } +// CHECK-1024-NEXT: %union.union_uint16m1 = type { <64 x i16> } +// CHECK-1024-NEXT: %union.union_uint32m1 = type { <32 x i32> } +// CHECK-1024-NEXT: %union.union_uint64m1 = type { <16 x i64> } +// CHECK-1024-NEXT: %union.union_float32m1 = type { <32 x float> } +// CHECK-1024-NEXT: %union.union_float64m1 = type { <16 x double> } + +//===----------------------------------------------------------------------===// +// Global variables +//===----------------------------------------------------------------------===// +// CHECK-64: @global_i8 ={{.*}} global <8 x i8> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_i16 ={{.*}} global <4 x i16> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_i32 ={{.*}} global <2 x i32> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_i64 ={{.*}} global <1 x i64> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_u8 ={{.*}} global <8 x i8> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_u16 ={{.*}} global <4 x i16> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_u32 ={{.*}} global <2 x i32> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_u64 ={{.*}} global <1 x i64> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_f32 ={{.*}} global <2 x float> zeroinitializer, align 8 +// CHECK-64-NEXT: @global_f64 ={{.*}} global <1 x double> zeroinitializer, align 8 + +// CHECK-128: @global_i8 ={{.*}} global <16 x i8> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_i16 ={{.*}} global <8 x i16> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_i32 ={{.*}} global <4 x i32> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_i64 ={{.*}} global <2 x i64> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_u8 ={{.*}} global <16 x i8> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_u16 ={{.*}} global <8 x i16> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_u32 ={{.*}} global <4 x i32> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_u64 ={{.*}} global <2 x i64> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_f32 ={{.*}} global <4 x float> zeroinitializer, align 8 +// CHECK-128-NEXT: @global_f64 ={{.*}} global <2 x double> zeroinitializer, align 8 + +// CHECK-256: @global_i8 ={{.*}} global <32 x i8> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_i16 ={{.*}} global <16 x i16> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_i32 ={{.*}} global <8 x i32> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_i64 ={{.*}} global <4 x i64> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_u8 ={{.*}} global <32 x i8> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_u16 ={{.*}} global <16 x i16> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_u32 ={{.*}} global <8 x i32> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_u64 ={{.*}} global <4 x i64> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_f32 ={{.*}} global <8 x float> zeroinitializer, align 8 +// CHECK-NEXT-256: @global_f64 ={{.*}} global <4 x double> zeroinitializer, align 8 + +// CHECK-512: @global_i8 ={{.*}} global <64 x i8> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_i16 ={{.*}} global <32 x i16> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_i32 ={{.*}} global <16 x i32> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_i64 ={{.*}} global <8 x i64> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_u8 ={{.*}} global <64 x i8> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_u16 ={{.*}} global <32 x i16> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_u32 ={{.*}} global <16 x i32> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_u64 ={{.*}} global <8 x i64> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_f32 ={{.*}} global <16 x float> zeroinitializer, align 8 +// CHECK-NEXT-512: @global_f64 ={{.*}} global <8 x double> zeroinitializer, align 8 + +// CHECK-1024: @global_i8 ={{.*}} global <128 x i8> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_i16 ={{.*}} global <64 x i16> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_i32 ={{.*}} global <32 x i32> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_i64 ={{.*}} global <16 x i64> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_u8 ={{.*}} global <128 x i8> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_u16 ={{.*}} global <64 x i16> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_u32 ={{.*}} global <32 x i32> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_u64 ={{.*}} global <16 x i64> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_f32 ={{.*}} global <32 x float> zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_f64 ={{.*}} global <16 x double> zeroinitializer, align 8 + +//===----------------------------------------------------------------------===// +// Global arrays +//===----------------------------------------------------------------------===// +// CHECK-64: @global_arr_i8 ={{.*}} global [3 x <8 x i8>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_i16 ={{.*}} global [3 x <4 x i16>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_i32 ={{.*}} global [3 x <2 x i32>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_i64 ={{.*}} global [3 x <1 x i64>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_u8 ={{.*}} global [3 x <8 x i8>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_u16 ={{.*}} global [3 x <4 x i16>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_u32 ={{.*}} global [3 x <2 x i32>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_u64 ={{.*}} global [3 x <1 x i64>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_f32 ={{.*}} global [3 x <2 x float>] zeroinitializer, align 8 +// CHECK-64-NEXT: @global_arr_f64 ={{.*}} global [3 x <1 x double>] zeroinitializer, align 8 + +// CHECK-128: @global_arr_i8 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_i16 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_i32 ={{.*}} global [3 x <4 x i32>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_i64 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_u8 ={{.*}} global [3 x <16 x i8>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_u16 ={{.*}} global [3 x <8 x i16>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_u32 ={{.*}} global [3 x <4 x i32>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_u64 ={{.*}} global [3 x <2 x i64>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_f32 ={{.*}} global [3 x <4 x float>] zeroinitializer, align 8 +// CHECK-128-NEXT: @global_arr_f64 ={{.*}} global [3 x <2 x double>] zeroinitializer, align 8 + +// CHECK-256: @global_arr_i8 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_i16 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_i32 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_i64 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_u8 ={{.*}} global [3 x <32 x i8>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_u16 ={{.*}} global [3 x <16 x i16>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_u32 ={{.*}} global [3 x <8 x i32>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_u64 ={{.*}} global [3 x <4 x i64>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_f32 ={{.*}} global [3 x <8 x float>] zeroinitializer, align 8 +// CHECK-NEXT-256: @global_arr_f64 ={{.*}} global [3 x <4 x double>] zeroinitializer, align 8 + +// CHECK-512: @global_arr_i8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_i16 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_i32 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_i64 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_u8 ={{.*}} global [3 x <64 x i8>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_u16 ={{.*}} global [3 x <32 x i16>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_u32 ={{.*}} global [3 x <16 x i32>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_u64 ={{.*}} global [3 x <8 x i64>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_f32 ={{.*}} global [3 x <16 x float>] zeroinitializer, align 8 +// CHECK-NEXT-512: @global_arr_f64 ={{.*}} global [3 x <8 x double>] zeroinitializer, align 8 + +// CHECK-1024: @global_arr_i8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_i16 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_i32 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_i64 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_u8 ={{.*}} global [3 x <128 x i8>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_u16 ={{.*}} global [3 x <64 x i16>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_u32 ={{.*}} global [3 x <32 x i32>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_u64 ={{.*}} global [3 x <16 x i64>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_f32 ={{.*}} global [3 x <32 x float>] zeroinitializer, align 8 +// CHECK-NEXT-1024: @global_arr_f64 ={{.*}} global [3 x <16 x double>] zeroinitializer, align 8 + +//===----------------------------------------------------------------------===// +// Local variables +//===----------------------------------------------------------------------===// +// CHECK-64: %local_i8 = alloca <8 x i8>, align 8 +// CHECK-64-NEXT: %local_i16 = alloca <4 x i16>, align 8 +// CHECK-64-NEXT: %local_i32 = alloca <2 x i32>, align 8 +// CHECK-64-NEXT: %local_i64 = alloca <1 x i64>, align 8 +// CHECK-64-NEXT: %local_u8 = alloca <8 x i8>, align 8 +// CHECK-64-NEXT: %local_u16 = alloca <4 x i16>, align 8 +// CHECK-64-NEXT: %local_u32 = alloca <2 x i32>, align 8 +// CHECK-64-NEXT: %local_u64 = alloca <1 x i64>, align 8 +// CHECK-64-NEXT: %local_f32 = alloca <2 x float>, align 8 +// CHECK-64-NEXT: %local_f64 = alloca <1 x double>, align 8 + +// CHECK-128: %local_i8 = alloca <16 x i8>, align 8 +// CHECK-128-NEXT: %local_i16 = alloca <8 x i16>, align 8 +// CHECK-128-NEXT: %local_i32 = alloca <4 x i32>, align 8 +// CHECK-128-NEXT: %local_i64 = alloca <2 x i64>, align 8 +// CHECK-128-NEXT: %local_u8 = alloca <16 x i8>, align 8 +// CHECK-128-NEXT: %local_u16 = alloca <8 x i16>, align 8 +// CHECK-128-NEXT: %local_u32 = alloca <4 x i32>, align 8 +// CHECK-128-NEXT: %local_u64 = alloca <2 x i64>, align 8 +// CHECK-128-NEXT: %local_f32 = alloca <4 x float>, align 8 +// CHECK-128-NEXT: %local_f64 = alloca <2 x double>, align 8 + +// CHECK-256: %local_i8 = alloca <32 x i8>, align 8 +// CHECK-256-NEXT: %local_i16 = alloca <16 x i16>, align 8 +// CHECK-256-NEXT: %local_i32 = alloca <8 x i32>, align 8 +// CHECK-256-NEXT: %local_i64 = alloca <4 x i64>, align 8 +// CHECK-256-NEXT: %local_u8 = alloca <32 x i8>, align 8 +// CHECK-256-NEXT: %local_u16 = alloca <16 x i16>, align 8 +// CHECK-256-NEXT: %local_u32 = alloca <8 x i32>, align 8 +// CHECK-256-NEXT: %local_u64 = alloca <4 x i64>, align 8 +// CHECK-256-NEXT: %local_f32 = alloca <8 x float>, align 8 +// CHECK-256-NEXT: %local_f64 = alloca <4 x double>, align 8 + +// CHECK-512: %local_i8 = alloca <64 x i8>, align 8 +// CHECK-512-NEXT: %local_i16 = alloca <32 x i16>, align 8 +// CHECK-512-NEXT: %local_i32 = alloca <16 x i32>, align 8 +// CHECK-512-NEXT: %local_i64 = alloca <8 x i64>, align 8 +// CHECK-512-NEXT: %local_u8 = alloca <64 x i8>, align 8 +// CHECK-512-NEXT: %local_u16 = alloca <32 x i16>, align 8 +// CHECK-512-NEXT: %local_u32 = alloca <16 x i32>, align 8 +// CHECK-512-NEXT: %local_u64 = alloca <8 x i64>, align 8 +// CHECK-512-NEXT: %local_f32 = alloca <16 x float>, align 8 +// CHECK-512-NEXT: %local_f64 = alloca <8 x double>, align 8 + +// CHECK-1024: %local_i8 = alloca <128 x i8>, align 8 +// CHECK-1024-NEXT: %local_i16 = alloca <64 x i16>, align 8 +// CHECK-1024-NEXT: %local_i32 = alloca <32 x i32>, align 8 +// CHECK-1024-NEXT: %local_i64 = alloca <16 x i64>, align 8 +// CHECK-1024-NEXT: %local_u8 = alloca <128 x i8>, align 8 +// CHECK-1024-NEXT: %local_u16 = alloca <64 x i16>, align 8 +// CHECK-1024-NEXT: %local_u32 = alloca <32 x i32>, align 8 +// CHECK-1024-NEXT: %local_u64 = alloca <16 x i64>, align 8 +// CHECK-1024-NEXT: %local_f32 = alloca <32 x float>, align 8 +// CHECK-1024-NEXT: %local_f64 = alloca <16 x double>, align 8 + +//===----------------------------------------------------------------------===// +// Local arrays +//===----------------------------------------------------------------------===// +// CHECK-64: %local_arr_i8 = alloca [3 x <8 x i8>], align 8 +// CHECK-64-NEXT: %local_arr_i16 = alloca [3 x <4 x i16>], align 8 +// CHECK-64-NEXT: %local_arr_i32 = alloca [3 x <2 x i32>], align 8 +// CHECK-64-NEXT: %local_arr_i64 = alloca [3 x <1 x i64>], align 8 +// CHECK-64-NEXT: %local_arr_u8 = alloca [3 x <8 x i8>], align 8 +// CHECK-64-NEXT: %local_arr_u16 = alloca [3 x <4 x i16>], align 8 +// CHECK-64-NEXT: %local_arr_u32 = alloca [3 x <2 x i32>], align 8 +// CHECK-64-NEXT: %local_arr_u64 = alloca [3 x <1 x i64>], align 8 +// CHECK-64-NEXT: %local_arr_f32 = alloca [3 x <2 x float>], align 8 +// CHECK-64-NEXT: %local_arr_f64 = alloca [3 x <1 x double>], align 8 + +// CHECK-128: %local_arr_i8 = alloca [3 x <16 x i8>], align 8 +// CHECK-128-NEXT: %local_arr_i16 = alloca [3 x <8 x i16>], align 8 +// CHECK-128-NEXT: %local_arr_i32 = alloca [3 x <4 x i32>], align 8 +// CHECK-128-NEXT: %local_arr_i64 = alloca [3 x <2 x i64>], align 8 +// CHECK-128-NEXT: %local_arr_u8 = alloca [3 x <16 x i8>], align 8 +// CHECK-128-NEXT: %local_arr_u16 = alloca [3 x <8 x i16>], align 8 +// CHECK-128-NEXT: %local_arr_u32 = alloca [3 x <4 x i32>], align 8 +// CHECK-128-NEXT: %local_arr_u64 = alloca [3 x <2 x i64>], align 8 +// CHECK-128-NEXT: %local_arr_f32 = alloca [3 x <4 x float>], align 8 +// CHECK-128-NEXT: %local_arr_f64 = alloca [3 x <2 x double>], align 8 + +// CHECK-256: %local_arr_i8 = alloca [3 x <32 x i8>], align 8 +// CHECK-256-NEXT: %local_arr_i16 = alloca [3 x <16 x i16>], align 8 +// CHECK-256-NEXT: %local_arr_i32 = alloca [3 x <8 x i32>], align 8 +// CHECK-256-NEXT: %local_arr_i64 = alloca [3 x <4 x i64>], align 8 +// CHECK-256-NEXT: %local_arr_u8 = alloca [3 x <32 x i8>], align 8 +// CHECK-256-NEXT: %local_arr_u16 = alloca [3 x <16 x i16>], align 8 +// CHECK-256-NEXT: %local_arr_u32 = alloca [3 x <8 x i32>], align 8 +// CHECK-256-NEXT: %local_arr_u64 = alloca [3 x <4 x i64>], align 8 +// CHECK-256-NEXT: %local_arr_f32 = alloca [3 x <8 x float>], align 8 +// CHECK-256-NEXT: %local_arr_f64 = alloca [3 x <4 x double>], align 8 + +// CHECK-512: %local_arr_i8 = alloca [3 x <64 x i8>], align 8 +// CHECK-512-NEXT: %local_arr_i16 = alloca [3 x <32 x i16>], align 8 +// CHECK-512-NEXT: %local_arr_i32 = alloca [3 x <16 x i32>], align 8 +// CHECK-512-NEXT: %local_arr_i64 = alloca [3 x <8 x i64>], align 8 +// CHECK-512-NEXT: %local_arr_u8 = alloca [3 x <64 x i8>], align 8 +// CHECK-512-NEXT: %local_arr_u16 = alloca [3 x <32 x i16>], align 8 +// CHECK-512-NEXT: %local_arr_u32 = alloca [3 x <16 x i32>], align 8 +// CHECK-512-NEXT: %local_arr_u64 = alloca [3 x <8 x i64>], align 8 +// CHECK-512-NEXT: %local_arr_f32 = alloca [3 x <16 x float>], align 8 +// CHECK-512-NEXT: %local_arr_f64 = alloca [3 x <8 x double>], align 8 + +// CHECK-1024: %local_arr_i8 = alloca [3 x <128 x i8>], align 8 +// CHECK-1024-NEXT: %local_arr_i16 = alloca [3 x <64 x i16>], align 8 +// CHECK-1024-NEXT: %local_arr_i32 = alloca [3 x <32 x i32>], align 8 +// CHECK-1024-NEXT: %local_arr_i64 = alloca [3 x <16 x i64>], align 8 +// CHECK-1024-NEXT: %local_arr_u8 = alloca [3 x <128 x i8>], align 8 +// CHECK-1024-NEXT: %local_arr_u16 = alloca [3 x <64 x i16>], align 8 +// CHECK-1024-NEXT: %local_arr_u32 = alloca [3 x <32 x i32>], align 8 +// CHECK-1024-NEXT: %local_arr_u64 = alloca [3 x <16 x i64>], align 8 +// CHECK-1024-NEXT: %local_arr_f32 = alloca [3 x <32 x float>], align 8 +// CHECK-1024-NEXT: %local_arr_f64 = alloca [3 x <16 x double>], align 8 + +//===----------------------------------------------------------------------===// +// ILP32 ABI +//===----------------------------------------------------------------------===// +// CHECK-ILP32: @global_i32 ={{.*}} global <16 x i32> zeroinitializer, align 8 +// CHECK-ILP32: @global_i64 ={{.*}} global <8 x i64> zeroinitializer, align 8 +// CHECK-ILP32: @global_u32 ={{.*}} global <16 x i32> zeroinitializer, align 8 +// CHECK-ILP32: @global_u64 ={{.*}} global <8 x i64> zeroinitializer, align 8 diff --git a/clang/test/CodeGen/riscv-rvv-vls-arith-ops.c b/clang/test/CodeGen/riscv-rvv-vls-arith-ops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/riscv-rvv-vls-arith-ops.c @@ -0,0 +1,1821 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \ +// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \ +// RUN: opt -S -passes=sroa | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +// ADDITION + +// CHECK-LABEL: @add_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t add_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t add_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t add_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t add_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t add_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t add_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t add_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t add_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t add_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t add_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a + b; +} + +// CHECK-LABEL: @add_inplace_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t add_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t add_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t add_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t add_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t add_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t add_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t add_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t add_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = fadd <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t add_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_inplace_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t add_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a += b; +} + +// CHECK-LABEL: @add_scalar_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t add_scalar_i8(fixed_int8m1_t a, int8_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t add_scalar_i16(fixed_int16m1_t a, int16_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t add_scalar_i32(fixed_int32m1_t a, int32_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t add_scalar_i64(fixed_int64m1_t a, int64_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t add_scalar_u8(fixed_uint8m1_t a, uint8_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t add_scalar_u16(fixed_uint16m1_t a, uint16_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t add_scalar_u32(fixed_uint32m1_t a, uint32_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = add <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t add_scalar_u64(fixed_uint64m1_t a, uint64_t b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = fadd <8 x float> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t add_scalar_f32(fixed_float32m1_t a, float b) { + return a + b; +} + +// CHECK-LABEL: @add_scalar_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[ADD:%.*]] = fadd <4 x double> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[ADD]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t add_scalar_f64(fixed_float64m1_t a, double b) { + return a + b; +} + +// SUBTRACTION + +// CHECK-LABEL: @sub_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t sub_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t sub_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t sub_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t sub_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t sub_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t sub_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t sub_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t sub_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t sub_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t sub_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t sub_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t sub_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t sub_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t sub_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t sub_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t sub_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t sub_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t sub_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = fsub <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t sub_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_inplace_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t sub_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t sub_scalar_i8(fixed_int8m1_t a, int8_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t sub_scalar_i16(fixed_int16m1_t a, int16_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t sub_scalar_i32(fixed_int32m1_t a, int32_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t sub_scalar_i64(fixed_int64m1_t a, int64_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t sub_scalar_u8(fixed_uint8m1_t a, uint8_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t sub_scalar_u16(fixed_uint16m1_t a, uint16_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t sub_scalar_u32(fixed_uint32m1_t a, uint32_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = sub <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t sub_scalar_u64(fixed_uint64m1_t a, uint64_t b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = fsub <8 x float> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t sub_scalar_f32(fixed_float32m1_t a, float b) { + return a - b; +} + +// CHECK-LABEL: @sub_scalar_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SUB:%.*]] = fsub <4 x double> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[SUB]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t sub_scalar_f64(fixed_float64m1_t a, double b) { + return a - b; +} + +// MULTIPLICATION + +// CHECK-LABEL: @mul_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t mul_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t mul_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t mul_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t mul_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t mul_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t mul_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t mul_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t mul_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t mul_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t mul_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t mul_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t mul_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t mul_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t mul_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t mul_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t mul_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t mul_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t mul_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t mul_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_inplace_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t mul_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t mul_scalar_i8(fixed_int8m1_t a, int8_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t mul_scalar_i16(fixed_int16m1_t a, int16_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t mul_scalar_i32(fixed_int32m1_t a, int32_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t mul_scalar_i64(fixed_int64m1_t a, int64_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t mul_scalar_u8(fixed_uint8m1_t a, uint8_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t mul_scalar_u16(fixed_uint16m1_t a, uint16_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t mul_scalar_u32(fixed_uint32m1_t a, uint32_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = mul <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t mul_scalar_u64(fixed_uint64m1_t a, uint64_t b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x float> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t mul_scalar_f32(fixed_float32m1_t a, float b) { + return a * b; +} + +// CHECK-LABEL: @mul_scalar_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x double> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[MUL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t mul_scalar_f64(fixed_float64m1_t a, double b) { + return a * b; +} + +// DIVISION + +// CHECK-LABEL: @div_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t div_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t div_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t div_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t div_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t div_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t div_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t div_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t div_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t div_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t div_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t div_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t div_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t div_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t div_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t div_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t div_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t div_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t div_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t div_inplace_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_inplace_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t div_inplace_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = sdiv <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t div_scalar_i8(fixed_int8m1_t a, int8_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = sdiv <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t div_scalar_i16(fixed_int16m1_t a, int16_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = sdiv <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t div_scalar_i32(fixed_int32m1_t a, int32_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = sdiv <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t div_scalar_i64(fixed_int64m1_t a, int64_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = udiv <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t div_scalar_u8(fixed_uint8m1_t a, uint8_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = udiv <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t div_scalar_u16(fixed_uint16m1_t a, uint16_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = udiv <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t div_scalar_u32(fixed_uint32m1_t a, uint32_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = udiv <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t div_scalar_u64(fixed_uint64m1_t a, uint64_t b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x float> [[SPLAT_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2f32.v8f32( undef, <8 x float> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float32m1_t div_scalar_f32(fixed_float32m1_t a, float b) { + return a / b; +} + +// CHECK-LABEL: @div_scalar_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x double> [[SPLAT_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[DIV:%.*]] = fdiv <4 x double> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1f64.v4f64( undef, <4 x double> [[DIV]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_float64m1_t div_scalar_f64(fixed_float64m1_t a, double b) { + return a / b; +} + +// REMAINDER + +// CHECK-LABEL: @rem_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t rem_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t rem_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t rem_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t rem_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t rem_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t rem_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t rem_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t rem_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t rem_inplace_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t rem_inplace_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t rem_inplace_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = srem <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t rem_inplace_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t rem_inplace_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t rem_inplace_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t rem_inplace_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_inplace_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[REM:%.*]] = urem <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t rem_inplace_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = srem <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t rem_scalar_i8(fixed_int8m1_t a, int8_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = srem <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t rem_scalar_i16(fixed_int16m1_t a, int16_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = srem <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t rem_scalar_i32(fixed_int32m1_t a, int32_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = srem <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t rem_scalar_i64(fixed_int64m1_t a, int64_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = urem <32 x i8> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t rem_scalar_u8(fixed_uint8m1_t a, uint8_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = urem <16 x i16> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t rem_scalar_u16(fixed_uint16m1_t a, uint16_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = urem <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t rem_scalar_u32(fixed_uint32m1_t a, uint32_t b) { + return a % b; +} + +// CHECK-LABEL: @rem_scalar_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[REM:%.*]] = urem <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[REM]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t rem_scalar_u64(fixed_uint64m1_t a, uint64_t b) { + return a % b; +} diff --git a/clang/test/CodeGen/riscv-rvv-vls-bitwise-ops.c b/clang/test/CodeGen/riscv-rvv-vls-bitwise-ops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/riscv-rvv-vls-bitwise-ops.c @@ -0,0 +1,419 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \ +// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \ +// RUN: opt -S -passes=sroa | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +// AND + +// CHECK-LABEL: @and_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t and_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t and_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t and_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t and_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t and_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t and_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t and_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a & b; +} + +// CHECK-LABEL: @and_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[AND:%.*]] = and <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[AND]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t and_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a & b; +} + +// OR + +// CHECK-LABEL: @or_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t or_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t or_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t or_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t or_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t or_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t or_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t or_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a | b; +} + +// CHECK-LABEL: @or_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[OR:%.*]] = or <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[OR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t or_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a | b; +} + +// XOR + +// CHECK-LABEL: @xor_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t xor_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t xor_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t xor_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t xor_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t xor_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t xor_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t xor_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a ^ b; +} + +// CHECK-LABEL: @xor_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[XOR:%.*]] = xor <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[XOR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t xor_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a ^ b; +} + +// NEG + +// CHECK-LABEL: @not_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t not_i8(fixed_int8m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t not_i16(fixed_int16m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t not_i32(fixed_int32m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t not_i64(fixed_int64m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <32 x i8> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t not_u8(fixed_uint8m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <16 x i16> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t not_u16(fixed_uint16m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <8 x i32> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t not_u32(fixed_uint32m1_t a) { + return ~a; +} + +// CHECK-LABEL: @not_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[NOT:%.*]] = xor <4 x i64> [[A]], +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[NOT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t not_u64(fixed_uint64m1_t a) { + return ~a; +} diff --git a/clang/test/CodeGen/riscv-rvv-vls-compare-ops.c b/clang/test/CodeGen/riscv-rvv-vls-compare-ops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/riscv-rvv-vls-compare-ops.c @@ -0,0 +1,827 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \ +// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \ +// RUN: opt -S -passes=sroa | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +// EQ + +// CHECK-LABEL: @eq_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t eq_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t eq_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t eq_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t eq_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t eq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t eq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t eq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp eq <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t eq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t eq_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a == b; +} + +// CHECK-LABEL: @eq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t eq_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a == b; +} + +// NEQ + +// CHECK-LABEL: @neq_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t neq_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t neq_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t neq_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t neq_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t neq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t neq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t neq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ne <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t neq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp une <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t neq_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a != b; +} + +// CHECK-LABEL: @neq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp une <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t neq_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a != b; +} + +// LT + +// CHECK-LABEL: @lt_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp slt <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t lt_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp slt <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t lt_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp slt <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t lt_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp slt <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t lt_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ult <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t lt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ult <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t lt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ult <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t lt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ult <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t lt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp olt <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t lt_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a < b; +} + +// CHECK-LABEL: @lt_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t lt_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a < b; +} + +// LEQ + +// CHECK-LABEL: @leq_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sle <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t leq_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sle <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t leq_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sle <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t leq_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sle <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t leq_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ule <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t leq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ule <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t leq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ule <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t leq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ule <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t leq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp ole <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t leq_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a <= b; +} + +// CHECK-LABEL: @leq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp ole <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t leq_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a <= b; +} + +// GT + +// CHECK-LABEL: @gt_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t gt_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t gt_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t gt_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t gt_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ugt <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t gt_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ugt <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t gt_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ugt <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t gt_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp ugt <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t gt_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t gt_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a > b; +} + +// CHECK-LABEL: @gt_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp ogt <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t gt_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a > b; +} + +// GEQ + +// CHECK-LABEL: @geq_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sge <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t geq_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sge <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t geq_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sge <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t geq_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp sge <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t geq_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp uge <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <32 x i1> [[CMP]] to <32 x i8> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t geq_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp uge <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <16 x i1> [[CMP]] to <16 x i16> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t geq_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp uge <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t geq_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = icmp uge <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t geq_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp oge <8 x float> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <8 x i1> [[CMP]] to <8 x i32> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t geq_f32(fixed_float32m1_t a, fixed_float32m1_t b) { + return a >= b; +} + +// CHECK-LABEL: @geq_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CMP:%.*]] = fcmp oge <4 x double> [[A]], [[B]] +// CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i64> +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SEXT]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t geq_f64(fixed_float64m1_t a, fixed_float64m1_t b) { + return a >= b; +} diff --git a/clang/test/CodeGen/riscv-rvv-vls-shift-ops.c b/clang/test/CodeGen/riscv-rvv-vls-shift-ops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/riscv-rvv-vls-shift-ops.c @@ -0,0 +1,659 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \ +// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \ +// RUN: opt -S -passes=sroa | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +// CHECK-LABEL: @lshift_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t lshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_i8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = ashr <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t rshift_i8(fixed_int8m1_t a, fixed_int8m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t lshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = lshr <32 x i8> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t rshift_u8(fixed_uint8m1_t a, fixed_uint8m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t lshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_i16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = ashr <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t rshift_i16(fixed_int16m1_t a, fixed_int16m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t lshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = lshr <16 x i16> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t rshift_u16(fixed_uint16m1_t a, fixed_uint16m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t lshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = ashr <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t rshift_i32(fixed_int32m1_t a, fixed_int32m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t lshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i32> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t rshift_u32(fixed_uint32m1_t a, fixed_uint32m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t lshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_i64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t rshift_i64(fixed_int64m1_t a, fixed_int64m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHL:%.*]] = shl <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t lshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a << b; +} + +// CHECK-LABEL: @rshift_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[B:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[B_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i64> [[A]], [[B]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t rshift_u64(fixed_uint64m1_t a, fixed_uint64m1_t b) { + return a >> b; +} + +// CHECK-LABEL: @lshift_i8_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8> +// CHECK-NEXT: [[SHL:%.*]] = shl <32 x i8> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t lshift_i8_rsplat(fixed_int8m1_t a, int8_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_i8_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <32 x i8> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t lshift_i8_lsplat(fixed_int8m1_t a, int8_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_i8_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = sext i8 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8> +// CHECK-NEXT: [[SHR:%.*]] = ashr <32 x i8> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t rshift_i8_rsplat(fixed_int8m1_t a, int8_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_i8_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = ashr <32 x i8> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int8m1_t rshift_i8_lsplat(fixed_int8m1_t a, int8_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_u8_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8> +// CHECK-NEXT: [[SHL:%.*]] = shl <32 x i8> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t lshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_u8_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <32 x i8> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t lshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_u8_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = zext i8 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i32> [[SPLAT_SPLATINSERT]], <32 x i32> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <32 x i32> [[SPLAT_SPLAT]] to <32 x i8> +// CHECK-NEXT: [[SHR:%.*]] = lshr <32 x i8> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t rshift_u8_rsplat(fixed_uint8m1_t a, uint8_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_u8_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <32 x i8> poison, i8 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <32 x i8> [[SPLAT_SPLATINSERT]], <32 x i8> poison, <32 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = lshr <32 x i8> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv8i8.v32i8( undef, <32 x i8> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint8m1_t rshift_u8_lsplat(fixed_uint8m1_t a, uint8_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_i16_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16> +// CHECK-NEXT: [[SHL:%.*]] = shl <16 x i16> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t lshift_i16_rsplat(fixed_int16m1_t a, int16_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_i16_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <16 x i16> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t lshift_i16_lsplat(fixed_int16m1_t a, int16_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_i16_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = sext i16 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16> +// CHECK-NEXT: [[SHR:%.*]] = ashr <16 x i16> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t rshift_i16_rsplat(fixed_int16m1_t a, int16_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_i16_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = ashr <16 x i16> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int16m1_t rshift_i16_lsplat(fixed_int16m1_t a, int16_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_u16_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16> +// CHECK-NEXT: [[SHL:%.*]] = shl <16 x i16> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t lshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_u16_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <16 x i16> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t lshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_u16_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[B:%.*]] to i32 +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[CONV]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i32> [[SPLAT_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SH_PROM:%.*]] = trunc <16 x i32> [[SPLAT_SPLAT]] to <16 x i16> +// CHECK-NEXT: [[SHR:%.*]] = lshr <16 x i16> [[A]], [[SH_PROM]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t rshift_u16_rsplat(fixed_uint16m1_t a, uint16_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_u16_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <16 x i16> poison, i16 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <16 x i16> [[SPLAT_SPLATINSERT]], <16 x i16> poison, <16 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = lshr <16 x i16> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv4i16.v16i16( undef, <16 x i16> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint16m1_t rshift_u16_lsplat(fixed_uint16m1_t a, uint16_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_i32_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t lshift_i32_rsplat(fixed_int32m1_t a, int32_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_i32_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t lshift_i32_lsplat(fixed_int32m1_t a, int32_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_i32_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = ashr <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t rshift_i32_rsplat(fixed_int32m1_t a, int32_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_i32_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = ashr <8 x i32> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int32m1_t rshift_i32_lsplat(fixed_int32m1_t a, int32_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_u32_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t lshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_u32_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <8 x i32> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t lshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_u32_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i32> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t rshift_u32_rsplat(fixed_uint32m1_t a, uint32_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_u32_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <8 x i32> poison, i32 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <8 x i32> [[SPLAT_SPLATINSERT]], <8 x i32> poison, <8 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = lshr <8 x i32> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv2i32.v8i32( undef, <8 x i32> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint32m1_t rshift_u32_lsplat(fixed_uint32m1_t a, uint32_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_i64_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t lshift_i64_rsplat(fixed_int64m1_t a, int64_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_i64_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <4 x i64> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t lshift_i64_lsplat(fixed_int64m1_t a, int64_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_i64_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t rshift_i64_rsplat(fixed_int64m1_t a, int64_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_i64_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = ashr <4 x i64> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_int64m1_t rshift_i64_lsplat(fixed_int64m1_t a, int64_t b) { + return b >> a; +} + +// CHECK-LABEL: @lshift_u64_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t lshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) { + return a << b; +} + +// CHECK-LABEL: @lshift_u64_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHL:%.*]] = shl <4 x i64> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHL]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t lshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) { + return b << a; +} + +// CHECK-LABEL: @rshift_u64_rsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i64> [[A]], [[SPLAT_SPLAT]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t rshift_u64_rsplat(fixed_uint64m1_t a, uint64_t b) { + return a >> b; +} + +// CHECK-LABEL: @rshift_u64_lsplat( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[SPLAT_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[B:%.*]], i64 0 +// CHECK-NEXT: [[SPLAT_SPLAT:%.*]] = shufflevector <4 x i64> [[SPLAT_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer +// CHECK-NEXT: [[SHR:%.*]] = lshr <4 x i64> [[SPLAT_SPLAT]], [[A]] +// CHECK-NEXT: [[CASTSCALABLESVE:%.*]] = call @llvm.vector.insert.nxv1i64.v4i64( undef, <4 x i64> [[SHR]], i64 0) +// CHECK-NEXT: ret [[CASTSCALABLESVE]] +// +fixed_uint64m1_t rshift_u64_lsplat(fixed_uint64m1_t a, uint64_t b) { + return b >> a; +} diff --git a/clang/test/CodeGen/riscv-rvv-vls-subscript-ops.c b/clang/test/CodeGen/riscv-rvv-vls-subscript-ops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/riscv-rvv-vls-subscript-ops.c @@ -0,0 +1,136 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64d \ +// RUN: -target-feature +f -target-feature +d -disable-O0-optnone \ +// RUN: -mvscale-min=4 -mvscale-max=4 -emit-llvm -o - %s | \ +// RUN: opt -S -passes=sroa | FileCheck %s + +// REQUIRES: riscv-registered-target + +#include +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +// CHECK-LABEL: @subscript_int8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <32 x i8> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i8 [[VECEXT]] +// +int8_t subscript_int8(fixed_int8m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_uint8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <32 x i8> @llvm.vector.extract.v32i8.nxv8i8( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <32 x i8> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i8 [[VECEXT]] +// +uint8_t subscript_uint8(fixed_uint8m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_int16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i16> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i16 [[VECEXT]] +// +int16_t subscript_int16(fixed_int16m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_uint16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <16 x i16> @llvm.vector.extract.v16i16.nxv4i16( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <16 x i16> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i16 [[VECEXT]] +// +uint16_t subscript_uint16(fixed_uint16m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_int32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x i32> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i32 [[VECEXT]] +// +int32_t subscript_int32(fixed_int32m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_uint32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x i32> @llvm.vector.extract.v8i32.nxv2i32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x i32> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i32 [[VECEXT]] +// +uint32_t subscript_uint32(fixed_uint32m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_int64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i64> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i64 [[VECEXT]] +// +int64_t subscript_int64(fixed_int64m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_uint64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x i64> @llvm.vector.extract.v4i64.nxv1i64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x i64> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret i64 [[VECEXT]] +// +uint64_t subscript_uint64(fixed_uint64m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <8 x float> @llvm.vector.extract.v8f32.nxv2f32( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret float [[VECEXT]] +// +float subscript_float32(fixed_float32m1_t a, size_t b) { + return a[b]; +} + +// CHECK-LABEL: @subscript_float64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A:%.*]] = call <4 x double> @llvm.vector.extract.v4f64.nxv1f64( [[A_COERCE:%.*]], i64 0) +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x double> [[A]], i64 [[B:%.*]] +// CHECK-NEXT: ret double [[VECEXT]] +// +double subscript_float64(fixed_float64m1_t a, size_t b) { + return a[b]; +} diff --git a/clang/test/CodeGenCXX/riscv-mangle-rvv-fixed-vectors.cpp b/clang/test/CodeGenCXX/riscv-mangle-rvv-fixed-vectors.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/riscv-mangle-rvv-fixed-vectors.cpp @@ -0,0 +1,118 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 \ +// RUN: | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 \ +// RUN: | FileCheck %s --check-prefix=CHECK-256 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 \ +// RUN: | FileCheck %s --check-prefix=CHECK-512 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 \ +// RUN: | FileCheck %s --check-prefix=CHECK-1024 + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +template struct S {}; + +// CHECK-64: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj64EEE +// CHECK-128: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj128EEE +// CHECK-256: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj256EEE +// CHECK-512: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj512EEE +// CHECK-1024: _Z2f11SI9__RVV_VLSIu14__rvv_int8m1_tLj1024EEE +void f1(S) {} + +// CHECK-64: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj64EEE +// CHECK-128: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj128EEE +// CHECK-256: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj256EEE +// CHECK-512: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj512EEE +// CHECK-1024: _Z2f21SI9__RVV_VLSIu15__rvv_int16m1_tLj1024EEE +void f2(S) {} + +// CHECK-64: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj64EEE +// CHECK-128: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj128EEE +// CHECK-256: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj256EEE +// CHECK-512: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj512EEE +// CHECK-1024: _Z2f31SI9__RVV_VLSIu15__rvv_int32m1_tLj1024EEE +void f3(S) {} + +// CHECK-64: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj64EEE +// CHECK-128: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj128EEE +// CHECK-256: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj256EEE +// CHECK-512: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj512EEE +// CHECK-1024: _Z2f41SI9__RVV_VLSIu15__rvv_int64m1_tLj1024EEE +void f4(S) {} + +// CHECK-64: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj64EEE +// CHECK-128: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj128EEE +// CHECK-256: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj256EEE +// CHECK-512: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj512EEE +// CHECK-1024: _Z2f51SI9__RVV_VLSIu15__rvv_uint8m1_tLj1024EEE +void f5(S) {} + +// CHECK-64: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj64EEE +// CHECK-128: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj128EEE +// CHECK-256: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj256EEE +// CHECK-512: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj512EEE +// CHECK-1024: _Z2f61SI9__RVV_VLSIu16__rvv_uint16m1_tLj1024EEE +void f6(S) {} + +// CHECK-64: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj64EEE +// CHECK-128: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj128EEE +// CHECK-256: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj256EEE +// CHECK-512: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj512EEE +// CHECK-1024: _Z2f71SI9__RVV_VLSIu16__rvv_uint32m1_tLj1024EEE +void f7(S) {} + +// CHECK-64: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj64EEE +// CHECK-128: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj128EEE +// CHECK-256: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj256EEE +// CHECK-512: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj512EEE +// CHECK-1024: _Z2f81SI9__RVV_VLSIu16__rvv_uint64m1_tLj1024EEE +void f8(S) {} + +// CHECK-64: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj64EEE +// CHECK-128: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj128EEE +// CHECK-256: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj256EEE +// CHECK-512: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj512EEE +// CHECK-1024: _Z2f91SI9__RVV_VLSIu17__rvv_float32m1_tLj1024EEE +void f9(S) {} + +// CHECK-64: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj64EEE +// CHECK-128: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj128EEE +// CHECK-256: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj256EEE +// CHECK-512: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj512EEE +// CHECK-1024: _Z3f101SI9__RVV_VLSIu17__rvv_float64m1_tLj1024EEE +void f10(S) {} diff --git a/clang/test/CodeGenCXX/riscv-rvv-fixedtypeinfo.cpp b/clang/test/CodeGenCXX/riscv-rvv-fixedtypeinfo.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGenCXX/riscv-rvv-fixedtypeinfo.cpp @@ -0,0 +1,123 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-64 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 \ +// RUN: | FileCheck %s --check-prefix=CHECK-128 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 \ +// RUN: | FileCheck %s --check-prefix=CHECK-256 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 \ +// RUN: | FileCheck %s --check-prefix=CHECK-512 +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu %s -emit-llvm -o - \ +// RUN: -target-feature +f -target-feature +d \ +// RUN: -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 \ +// RUN: | FileCheck %s --check-prefix=CHECK-1024 + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +namespace std { +class type_info; +}; + +auto &fs8 = typeid(fixed_int8m1_t); +auto &fs16 = typeid(fixed_int16m1_t); +auto &fs32 = typeid(fixed_int32m1_t); +auto &fs64 = typeid(fixed_int64m1_t); + +auto &fu8 = typeid(fixed_uint8m1_t); +auto &fu16 = typeid(fixed_uint16m1_t); +auto &fu32 = typeid(fixed_uint32m1_t); +auto &fu64 = typeid(fixed_uint64m1_t); + +auto &ff32 = typeid(fixed_float32m1_t); +auto &ff64 = typeid(fixed_float64m1_t); + +// CHECK-64: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu14__rvv_int8m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu14__rvv_int8m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_int16m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int16m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_int32m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int32m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_int64m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_int64m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu15__rvv_uint8m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu15__rvv_uint8m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu16__rvv_uint16m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint16m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu16__rvv_uint32m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint32m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu16__rvv_uint64m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu16__rvv_uint64m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu17__rvv_float32m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float32m1_tLj1024EE + +// CHECK-64: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj64EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj64EE +// CHECK-128: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj128EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj128EE +// CHECK-256: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj256EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj256EE +// CHECK-512: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj512EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj512EE +// CHECK-1024: @_ZTI9__RVV_VLSIu17__rvv_float64m1_tLj1024EE = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTS9__RVV_VLSIu17__rvv_float64m1_tLj1024EE diff --git a/clang/test/Driver/riscv-rvv-vector-bits.c b/clang/test/Driver/riscv-rvv-vector-bits.c --- a/clang/test/Driver/riscv-rvv-vector-bits.c +++ b/clang/test/Driver/riscv-rvv-vector-bits.c @@ -43,3 +43,24 @@ // RUN: -mrvv-vector-bits=64 2>&1 | FileCheck --check-prefix=CHECK-BAD-VALUE-ERROR %s // CHECK-BAD-VALUE-ERROR: error: unsupported argument '{{.*}}' to option '-mrvv-vector-bits=' + +// Error if using attribute without -msve-vector-bits= or if using -msve-vector-bits=+ syntax +// ----------------------------------------------------------------------------- +// RUN: not %clang -c %s -o /dev/null -target riscv64-linux-gnu \ +// RUN: -march=rv64gc_zve64x 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s +// RUN: not %clang -c %s -o /dev/null -target riscv64-linux-gnu \ +// RUN: -march=rv64gc_zve64x -mrvv-vector-bits=scalable 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s + +typedef __rvv_int32m1_t vint32m1_t; +typedef vint32m1_t noflag __attribute__((riscv_rvv_vector_bits(256))); + +// CHECK-NO-FLAG-ERROR: error: 'riscv_rvv_vector_bits' is only supported when '-mrvv-vector-bits=' is specified with a value of "zvl" or a power 2 in the range [64,65536] + +// Error if attribute vector size != -mrvv-vector-bits +// ----------------------------------------------------------------------------- +// RUN: not %clang -c %s -o /dev/null -target riscv64-linux-gnu \ +// RUN: -march=rv64gc_zve64x -mrvv-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-BAD-VECTOR-SIZE-ERROR %s + +typedef vint32_t bad_vector_size __attribute__((riscv_rvv_vector_bits(256))); + +// CHECK-BAD-VECTOR-SIZE-ERROR: error: invalid RVV vector size '256', must match value set by '-mrvv-vector-bits' ('128') diff --git a/clang/test/Sema/attr-riscv-rvv-vector-bits.c b/clang/test/Sema/attr-riscv-rvv-vector-bits.c --- a/clang/test/Sema/attr-riscv-rvv-vector-bits.c +++ b/clang/test/Sema/attr-riscv-rvv-vector-bits.c @@ -1,10 +1,12 @@ -// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify %s - -// TODO: Support for a arm_sve_vector_bits like attribute will come in the future. +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=1 -mvscale-max=1 %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=2 -mvscale-max=2 %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=4 -mvscale-max=4 %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=8 -mvscale-max=8 %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -ffreestanding -fsyntax-only -verify -mvscale-min=16 -mvscale-max=16 %s #include -#define N 64 +#define N __RISCV_RVV_VLEN_BITS typedef __rvv_int8m1_t vint8m1_t; typedef __rvv_uint8m1_t vuint8m1_t; @@ -17,64 +19,299 @@ typedef __rvv_float32m1_t vfloat32m1_t; typedef __rvv_float64m1_t vfloat64m1_t; +// Define valid fixed-width RVV types +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint16m1_t fixed_int16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint32m1_t fixed_int32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vint64m1_t fixed_int64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vuint8m1_t fixed_uint8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint16m1_t fixed_uint16m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint32m1_t fixed_uint32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vuint64m1_t fixed_uint64m1_t __attribute__((riscv_rvv_vector_bits(N))); + +typedef vfloat32m1_t fixed_float32m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t fixed_float64m1_t __attribute__((riscv_rvv_vector_bits(N))); + // GNU vector types -typedef int8_t gnu_int8_t __attribute__((vector_size(N / 8))); -typedef int16_t gnu_int16_t __attribute__((vector_size(N / 8))); -typedef int32_t gnu_int32_t __attribute__((vector_size(N / 8))); -typedef int64_t gnu_int64_t __attribute__((vector_size(N / 8))); +typedef int8_t gnu_int8m1_t __attribute__((vector_size(N / 8))); +typedef int16_t gnu_int16m1_t __attribute__((vector_size(N / 8))); +typedef int32_t gnu_int32m1_t __attribute__((vector_size(N / 8))); +typedef int64_t gnu_int64m1_t __attribute__((vector_size(N / 8))); + +typedef uint8_t gnu_uint8m1_t __attribute__((vector_size(N / 8))); +typedef uint16_t gnu_uint16m1_t __attribute__((vector_size(N / 8))); +typedef uint32_t gnu_uint32m1_t __attribute__((vector_size(N / 8))); +typedef uint64_t gnu_uint64m1_t __attribute__((vector_size(N / 8))); + +typedef float gnu_float32m1_t __attribute__((vector_size(N / 8))); +typedef double gnu_float64m1_t __attribute__((vector_size(N / 8))); + +// Attribute must have a single argument +typedef vint8m1_t no_argument __attribute__((riscv_rvv_vector_bits)); // expected-error {{'riscv_rvv_vector_bits' attribute takes one argument}} +typedef vint8m1_t two_arguments __attribute__((riscv_rvv_vector_bits(2, 4))); // expected-error {{'riscv_rvv_vector_bits' attribute takes one argument}} + +// The number of RVV vector bits must be an integer constant expression +typedef vint8m1_t non_int_size1 __attribute__((riscv_rvv_vector_bits(2.0))); // expected-error {{'riscv_rvv_vector_bits' attribute requires an integer constant}} +typedef vint8m1_t non_int_size2 __attribute__((riscv_rvv_vector_bits("256"))); // expected-error {{'riscv_rvv_vector_bits' attribute requires an integer constant}} + +// Attribute must be attached to a single RVV vector or predicate type. +typedef void *badtype1 __attribute__((riscv_rvv_vector_bits(N))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'void *'}} +typedef int badtype2 __attribute__((riscv_rvv_vector_bits(N))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'int'}} +typedef float badtype3 __attribute__((riscv_rvv_vector_bits(N))); // expected-error {{'riscv_rvv_vector_bits' attribute applied to non-RVV type 'float'}} + +// Attribute only applies to typedefs. +vint8m1_t non_typedef_type __attribute__((riscv_rvv_vector_bits(N))); // expected-error {{'riscv_rvv_vector_bits' attribute only applies to typedefs}} + +// Test that we can define non-local fixed-length RVV types (unsupported for +// sizeless types). +fixed_int8m1_t global_int8; + +extern fixed_int8m1_t extern_int8; + +static fixed_int8m1_t static_int8; + +fixed_int8m1_t *global_int8_ptr; +extern fixed_int8m1_t *extern_int8_ptr; +static fixed_int8m1_t *static_int8_ptr; +__thread fixed_int8m1_t thread_int8; + +typedef fixed_int8m1_t int8_typedef; +typedef fixed_int8m1_t *int8_ptr_typedef; -typedef uint8_t gnu_uint8_t __attribute__((vector_size(N / 8))); -typedef uint16_t gnu_uint16_t __attribute__((vector_size(N / 8))); -typedef uint32_t gnu_uint32_t __attribute__((vector_size(N / 8))); -typedef uint64_t gnu_uint64_t __attribute__((vector_size(N / 8))); +// Test sized expressions +int sizeof_int8 = sizeof(global_int8); +int sizeof_int8_var = sizeof(*global_int8_ptr); +int sizeof_int8_var_ptr = sizeof(global_int8_ptr); -typedef float gnu_float32_t __attribute__((vector_size(N / 8))); -typedef double gnu_float64_t __attribute__((vector_size(N / 8))); +extern fixed_int8m1_t *extern_int8_ptr; +int alignof_int8 = __alignof__(extern_int8); +int alignof_int8_var = __alignof__(*extern_int8_ptr); +int alignof_int8_var_ptr = __alignof__(extern_int8_ptr); void f(int c) { + fixed_int8m1_t fs8; vint8m1_t ss8; - gnu_int8_t gs8; + gnu_int8m1_t gs8; // Check conditional expressions where the result is ambiguous are // ill-formed. void *sel __attribute__((unused)); + sel = c ? ss8 : fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} + sel = c ? fs8 : ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} sel = c ? gs8 : ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} sel = c ? ss8 : gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + sel = c ? gs8 : fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + sel = c ? fs8 : gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + // Check binary expressions where the result is ambiguous are ill-formed. + ss8 = ss8 + fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} ss8 = ss8 + gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + fs8 = fs8 + ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} + fs8 = fs8 + gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 = gs8 + ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 = gs8 + fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + ss8 += fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} ss8 += gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + fs8 += ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} + fs8 += gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 += ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 += fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + ss8 = ss8 == fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} ss8 = ss8 == gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + fs8 = fs8 == ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} + fs8 = fs8 == gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 = gs8 == ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 = gs8 == fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} - ss8 = ss8 & gs8; // expected-error {{invalid operands to binary expression ('vint8m1_t' (aka '__rvv_int8m1_t') and 'gnu_int8_t' (vector of 8 'int8_t' values))}} + ss8 = ss8 & fs8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} + ss8 = ss8 & gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} - gs8 = gs8 & ss8; // expected-error {{invalid operands to binary expression ('gnu_int8_t' (vector of 8 'int8_t' values) and 'vint8m1_t' (aka '__rvv_int8m1_t'))}} + fs8 = fs8 & ss8; // expected-error {{cannot combine fixed-length and sizeless RVV vectors in expression, result is ambiguous}} + fs8 = fs8 & gs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + + gs8 = gs8 & ss8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} + gs8 = gs8 & fs8; // expected-error {{cannot combine GNU and RVV vectors in expression, result is ambiguous}} } +// --------------------------------------------------------------------------// +// Sizeof + +#define VECTOR_SIZE ((N / 8)) + +_Static_assert(sizeof(fixed_int8m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_int16m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_int32m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_int64m1_t) == VECTOR_SIZE, ""); + +_Static_assert(sizeof(fixed_uint8m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_uint16m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_uint32m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_int64m1_t) == VECTOR_SIZE, ""); + +_Static_assert(sizeof(fixed_float32m1_t) == VECTOR_SIZE, ""); +_Static_assert(sizeof(fixed_float64m1_t) == VECTOR_SIZE, ""); + +// --------------------------------------------------------------------------// +// Alignof + +#define VECTOR_ALIGN 8 + +_Static_assert(__alignof__(fixed_int8m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_int16m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_int32m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_int64m1_t) == VECTOR_ALIGN, ""); + +_Static_assert(__alignof__(fixed_uint8m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_uint16m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_uint32m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_uint64m1_t) == VECTOR_ALIGN, ""); + +_Static_assert(__alignof__(fixed_float32m1_t) == VECTOR_ALIGN, ""); +_Static_assert(__alignof__(fixed_float64m1_t) == VECTOR_ALIGN, ""); + +// --------------------------------------------------------------------------// +// Structs + +struct struct_int64 { fixed_int64m1_t x, y[5]; }; +struct struct_float64 { fixed_float64m1_t x, y[5]; }; + +// --------------------------------------------------------------------------// +// Unions +union union_int64 { fixed_int64m1_t x, y[5]; }; +union union_float64 { fixed_float64m1_t x, y[5]; }; + // --------------------------------------------------------------------------// // Implicit casts -gnu_int8_t to_gnu_int8_t_from_vint8m1_t_(vint8m1_t x) { return x; } // expected-error {{returning 'vint8m1_t' (aka '__rvv_int8m1_t') from a function with incompatible result type 'gnu_int8_t' (vector of 8 'int8_t' values)}} -vint8m1_t from_gnu_int8_t_to_vint8m1_t(gnu_int8_t x) { return x; } // expected-error {{returning 'gnu_int8_t' (vector of 8 'int8_t' values) from a function with incompatible result type 'vint8m1_t' (aka '__rvv_int8m1_t')}} +#define TEST_CAST_COMMON(TYPE) \ + v##TYPE##_t to_v##TYPE##_t_from_fixed(fixed_##TYPE##_t x) { return x; } \ + fixed_##TYPE##_t from_##TYPE##_t_to_fixed(v##TYPE##_t x) { return x; } + +#define TEST_CAST_GNU(PREFIX, TYPE) \ + gnu_##TYPE##_t to_gnu_##TYPE##_t_from_##PREFIX##TYPE##_t(PREFIX##TYPE##_t x) { return x; } \ + PREFIX##TYPE##_t from_gnu_##TYPE##_t_to_##PREFIX##TYPE##_t(gnu_##TYPE##_t x) { return x; } + +#define TEST_CAST_VECTOR(TYPE) \ + TEST_CAST_COMMON(TYPE) \ + TEST_CAST_GNU(v, TYPE) \ + TEST_CAST_GNU(fixed_, TYPE) + +TEST_CAST_VECTOR(int8m1) +TEST_CAST_VECTOR(int16m1) +TEST_CAST_VECTOR(int32m1) +TEST_CAST_VECTOR(int64m1) +TEST_CAST_VECTOR(uint8m1) +TEST_CAST_VECTOR(uint16m1) +TEST_CAST_VECTOR(uint32m1) +TEST_CAST_VECTOR(uint64m1) +TEST_CAST_VECTOR(float32m1) +TEST_CAST_VECTOR(float64m1) // --------------------------------------------------------------------------// -// Test passing GNU vector scalable function +// Test the scalable and fixed-length types can be used interchangeably vint32m1_t __attribute__((overloadable)) vfunc(vint32m1_t op1, vint32m1_t op2); vfloat64m1_t __attribute__((overloadable)) vfunc(vfloat64m1_t op1, vfloat64m1_t op2); -gnu_int32_t call_int32_ff(gnu_int32_t op1, gnu_int32_t op2) { - return vfunc(op1, op2); // expected-error {{no matching function for call to 'vfunc'}} - // expected-note@-5 {{candidate function not viable: no known conversion from 'gnu_int32_t' (vector of 2 'int32_t' values) to 'vint32m1_t' (aka '__rvv_int32m1_t') for 1st argument}} - // expected-note@-5 {{candidate function not viable: no known conversion from 'gnu_int32_t' (vector of 2 'int32_t' values) to 'vfloat64m1_t' (aka '__rvv_float64m1_t') for 1st argument}} -} +#define TEST_CALL(TYPE) \ + fixed_##TYPE##_t \ + call_##TYPE##_ff(fixed_##TYPE##_t op1, fixed_##TYPE##_t op2) { \ + return vfunc(op1, op2); \ + } \ + fixed_##TYPE##_t \ + call_##TYPE##_fs(fixed_##TYPE##_t op1, v##TYPE##_t op2) { \ + return vfunc(op1, op2); \ + } \ + fixed_##TYPE##_t \ + call_##TYPE##_sf(v##TYPE##_t op1, fixed_##TYPE##_t op2) { \ + return vfunc(op1, op2); \ + } + +TEST_CALL(int32m1) +TEST_CALL(float64m1) + +// --------------------------------------------------------------------------// +// Vector initialization + +#if __RISCV_RVV_VLEN_BITS == 256 + +typedef vint32m1_t int32x8 __attribute__((riscv_rvv_vector_bits(N))); +typedef vfloat64m1_t float64x4 __attribute__((riscv_rvv_vector_bits(N))); + +int32x8 foo = {1, 2, 3, 4, 5, 6, 7, 8}; +int32x8 foo2 = {1, 2, 3, 4, 5, 6, 7, 8, 9}; // expected-warning{{excess elements in vector initializer}} + +float64x4 bar = {1.0, 2.0, 3.0, 4.0}; +float64x4 bar2 = {1.0, 2.0, 3.0, 4.0, 5.0}; // expected-warning{{excess elements in vector initializer}} + +#endif + +// --------------------------------------------------------------------------// +// Vector ops + +#define TEST_BINARY(TYPE, NAME, OP) \ + TYPE NAME##_##TYPE(TYPE op1, TYPE op2) { \ + return op1 OP op2; \ + } \ + TYPE compound##NAME##_##TYPE(TYPE op1, TYPE op2) { \ + op1 OP##= op2; \ + return op1; \ + } + +#define TEST_COMPARISON(TYPE, NAME, OP) \ + TYPE NAME##_##TYPE(TYPE op1, TYPE op2) { \ + return op1 OP op2; \ + } + +#define TEST_UNARY(TYPE, NAME, OP) \ + TYPE NAME##_##TYPE(TYPE op1) { \ + return OP op1; \ + } + +#define TEST_OPS(TYPE) \ + TEST_BINARY(TYPE, add, +) \ + TEST_BINARY(TYPE, sub, -) \ + TEST_BINARY(TYPE, mul, *) \ + TEST_BINARY(TYPE, div, /) \ + TEST_COMPARISON(TYPE, eq, ==) \ + TEST_COMPARISON(TYPE, ne, !=) \ + TEST_COMPARISON(TYPE, lt, <) \ + TEST_COMPARISON(TYPE, gt, >) \ + TEST_COMPARISON(TYPE, lte, <=) \ + TEST_COMPARISON(TYPE, gte, >=) \ + TEST_UNARY(TYPE, nop, +) \ + TEST_UNARY(TYPE, neg, -) + +#define TEST_INT_OPS(TYPE) \ + TEST_OPS(TYPE) \ + TEST_BINARY(TYPE, mod, %) \ + TEST_BINARY(TYPE, and, &) \ + TEST_BINARY(TYPE, or, |) \ + TEST_BINARY(TYPE, xor, ^) \ + TEST_BINARY(TYPE, shl, <<) \ + TEST_BINARY(TYPE, shr, <<) \ + TEST_UNARY(TYPE, not, ~) + +TEST_INT_OPS(fixed_int8m1_t) +TEST_INT_OPS(fixed_int16m1_t) +TEST_INT_OPS(fixed_int32m1_t) +TEST_INT_OPS(fixed_int64m1_t) +TEST_INT_OPS(fixed_uint8m1_t) +TEST_INT_OPS(fixed_uint16m1_t) +TEST_INT_OPS(fixed_uint32m1_t) +TEST_INT_OPS(fixed_uint64m1_t) + +TEST_OPS(fixed_float32m1_t) +TEST_OPS(fixed_float64m1_t) diff --git a/clang/test/Sema/riscv-rvv-explicit-casts-fixed-size.c b/clang/test/Sema/riscv-rvv-explicit-casts-fixed-size.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/riscv-rvv-explicit-casts-fixed-size.c @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s + +// expected-no-diagnostics + +// REQUIRES: riscv-registered-target + +#define N __RISCV_RVV_VLEN_BITS +#define FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N))) + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vfloat32m1_t fixed_float32m1_t FIXED_ATTR; +typedef vfloat64m1_t fixed_float64m1_t FIXED_ATTR; +typedef vint32m1_t fixed_int32m1_t FIXED_ATTR; +typedef vint64m1_t fixed_int64m1_t FIXED_ATTR; + +// RVV VLS types can be cast to RVV VLA types, regardless of lane size. +// NOTE: the list below is NOT exhaustive for all RVV types. + +#define CAST(from, to) \ + void from##_to_##to(from a, to b) { \ + b = (to) a; \ + } + +#define TESTCASE(ty1, ty2) \ + CAST(ty1, ty2) \ + CAST(ty2, ty1) + +TESTCASE(fixed_float32m1_t, vfloat32m1_t) +TESTCASE(fixed_float32m1_t, vfloat64m1_t) +TESTCASE(fixed_float32m1_t, vint32m1_t) +TESTCASE(fixed_float32m1_t, vint64m1_t) + +TESTCASE(fixed_float64m1_t, vfloat32m1_t) +TESTCASE(fixed_float64m1_t, vfloat64m1_t) +TESTCASE(fixed_float64m1_t, vint32m1_t) +TESTCASE(fixed_float64m1_t, vint64m1_t) + +TESTCASE(fixed_int32m1_t, vfloat32m1_t) +TESTCASE(fixed_int32m1_t, vfloat64m1_t) +TESTCASE(fixed_int32m1_t, vint32m1_t) +TESTCASE(fixed_int32m1_t, vint64m1_t) + +TESTCASE(fixed_int64m1_t, vfloat32m1_t) +TESTCASE(fixed_int64m1_t, vfloat64m1_t) +TESTCASE(fixed_int64m1_t, vint32m1_t) +TESTCASE(fixed_int64m1_t, vint64m1_t) diff --git a/clang/test/Sema/riscv-rvv-lax-vector-conversions.c b/clang/test/Sema/riscv-rvv-lax-vector-conversions.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/riscv-rvv-lax-vector-conversions.c @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify=lax-vector-none %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=integer -ffreestanding -fsyntax-only -verify=lax-vector-integer %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=all -ffreestanding -fsyntax-only -verify=lax-vector-all %s + +// lax-vector-all-no-diagnostics + +// REQUIRES: riscv-registered-target + +#define N __RISCV_RVV_VLEN_BITS +#define RVV_FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N))) +#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8))) + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vfloat32m1_t rvv_fixed_float32m1_t RVV_FIXED_ATTR; +typedef vint32m1_t rvv_fixed_int32m1_t RVV_FIXED_ATTR; +typedef float gnu_fixed_float32m1_t GNU_FIXED_ATTR; +typedef int gnu_fixed_int32m1_t GNU_FIXED_ATTR; + +void rvv_allowed_with_integer_lax_conversions() { + rvv_fixed_int32m1_t fi32; + vint64m1_t si64; + + // The implicit cast here should fail if -flax-vector-conversions=none, but pass if + // -flax-vector-conversions={integer,all}. + fi32 = si64; + // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + si64 = fi32; + // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}} +} + +void rvv_allowed_with_all_lax_conversions() { + rvv_fixed_float32m1_t ff32; + vfloat64m1_t sf64; + + // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if + // -flax-vector-conversions=all. + ff32 = sf64; + // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} +} + +void gnu_allowed_with_integer_lax_conversions() { + gnu_fixed_int32m1_t fi32; + vint64m1_t si64; + + // The implicit cast here should fail if -flax-vector-conversions=none, but pass if + // -flax-vector-conversions={integer,all}. + fi32 = si64; + // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + si64 = fi32; + // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}} +} + +void gnu_allowed_with_all_lax_conversions() { + gnu_fixed_float32m1_t ff32; + vfloat64m1_t sf64; + + // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if + // -flax-vector-conversions=all. + ff32 = sf64; + // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} +} diff --git a/clang/test/Sema/riscv-vector-types-support.c b/clang/test/Sema/riscv-vector-types-support.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/riscv-vector-types-support.c @@ -0,0 +1,3 @@ +// RUN: %clang_cc1 %s -triple riscv64 -fsyntax-only -verify + +typedef __attribute__((riscv_rvv_vector_bits(256))) void norvvflag; // expected-error{{'riscv_rvv_vector_bits' attribute is not supported on targets missing 'zve32x'; specify an appropriate -march= or -mcpu=}} diff --git a/clang/test/SemaCXX/attr-riscv-rvv-vector-bits.cpp b/clang/test/SemaCXX/attr-riscv-rvv-vector-bits.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/attr-riscv-rvv-vector-bits.cpp @@ -0,0 +1,26 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +zve64x -ffreestanding -fsyntax-only -verify -std=c++11 -mvscale-min=4 -mvscale-max=4 -Wconversion %s +// expected-no-diagnostics + +#include + +#define N __RISCV_RVV_VLEN_BITS + +typedef __rvv_int8m1_t vint8m1_t; +typedef vint8m1_t fixed_int8m1_t __attribute__((riscv_rvv_vector_bits(N))); +typedef int8_t gnu_int8m1_t __attribute__((vector_size(N / 8))); + +template struct S { T var; }; + +S s; + +// Test implicit casts between VLA and VLS vectors +vint8m1_t to_vint8m1_t(fixed_int8m1_t x) { return x; } +fixed_int8m1_t from_vint8m1_t(vint8m1_t x) { return x; } + +// Test implicit casts between GNU and VLA vectors +vint8m1_t to_vint8m1_t__from_gnu_int8m1_t(gnu_int8m1_t x) { return x; } +gnu_int8m1_t from_vint8m1_t__to_gnu_int8m1_t(vint8m1_t x) { return x; } + +// Test implicit casts between GNU and VLS vectors +fixed_int8m1_t to_fixed_int8m1_t__from_gnu_int8m1_t(gnu_int8m1_t x) { return x; } +gnu_int8m1_t from_fixed_int8m1_t__to_gnu_int8m1_t(fixed_int8m1_t x) { return x; } diff --git a/clang/test/SemaCXX/riscv-rvv-explicit-casts-fixed-size.cpp b/clang/test/SemaCXX/riscv-rvv-explicit-casts-fixed-size.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/riscv-rvv-explicit-casts-fixed-size.cpp @@ -0,0 +1,60 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=1 -mvscale-max=1 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=2 -mvscale-max=2 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=4 -mvscale-max=4 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=16 -mvscale-max=16 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify %s + +// REQUIRES: riscv-registered-target + +// expected-no-diagnostics + +#define N __RISCV_RVV_VLEN_BITS +#define FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N))) + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vfloat32m1_t fixed_float32m1_t FIXED_ATTR; +typedef vfloat64m1_t fixed_float64m1_t FIXED_ATTR; +typedef vint32m1_t fixed_int32m1_t FIXED_ATTR; +typedef vint64m1_t fixed_int64m1_t FIXED_ATTR; + +// RVV VLS types can be cast to RVV VLA types, regardless of lane size. +// NOTE: the list below is NOT exhaustive for all RVV types. + +#define CAST(from, to) \ + void from##_to_##to(from a, to b) { \ + b = (to) a; \ + } + +#define TESTCASE(ty1, ty2) \ + CAST(ty1, ty2) \ + CAST(ty2, ty1) + +TESTCASE(fixed_float32m1_t, vfloat32m1_t) +TESTCASE(fixed_float32m1_t, vfloat64m1_t) +TESTCASE(fixed_float32m1_t, vint32m1_t) +TESTCASE(fixed_float32m1_t, vint64m1_t) + +TESTCASE(fixed_float64m1_t, vfloat32m1_t) +TESTCASE(fixed_float64m1_t, vfloat64m1_t) +TESTCASE(fixed_float64m1_t, vint32m1_t) +TESTCASE(fixed_float64m1_t, vint64m1_t) + +TESTCASE(fixed_int32m1_t, vfloat32m1_t) +TESTCASE(fixed_int32m1_t, vfloat64m1_t) +TESTCASE(fixed_int32m1_t, vint32m1_t) +TESTCASE(fixed_int32m1_t, vint64m1_t) + +TESTCASE(fixed_int64m1_t, vfloat32m1_t) +TESTCASE(fixed_int64m1_t, vfloat64m1_t) +TESTCASE(fixed_int64m1_t, vint32m1_t) +TESTCASE(fixed_int64m1_t, vint64m1_t) diff --git a/clang/test/SemaCXX/riscv-rvv-lax-vector-conversions.cpp b/clang/test/SemaCXX/riscv-rvv-lax-vector-conversions.cpp new file mode 100644 --- /dev/null +++ b/clang/test/SemaCXX/riscv-rvv-lax-vector-conversions.cpp @@ -0,0 +1,79 @@ +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=none -ffreestanding -fsyntax-only -verify=lax-vector-none %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=integer -ffreestanding -fsyntax-only -verify=lax-vector-integer %s +// RUN: %clang_cc1 -triple riscv64-none-linux-gnu -target-feature +f -target-feature +d -target-feature +zve64d -mvscale-min=8 -mvscale-max=8 -flax-vector-conversions=all -ffreestanding -fsyntax-only -verify=lax-vector-all %s + +// lax-vector-all-no-diagnostics + +// REQUIRES: riscv-registered-target + +#define N __RISCV_RVV_VLEN_BITS +#define RVV_FIXED_ATTR __attribute__((riscv_rvv_vector_bits(N))) +#define GNU_FIXED_ATTR __attribute__((vector_size(N / 8))) + +typedef __rvv_int8m1_t vint8m1_t; +typedef __rvv_uint8m1_t vuint8m1_t; +typedef __rvv_int16m1_t vint16m1_t; +typedef __rvv_uint16m1_t vuint16m1_t; +typedef __rvv_int32m1_t vint32m1_t; +typedef __rvv_uint32m1_t vuint32m1_t; +typedef __rvv_int64m1_t vint64m1_t; +typedef __rvv_uint64m1_t vuint64m1_t; +typedef __rvv_float32m1_t vfloat32m1_t; +typedef __rvv_float64m1_t vfloat64m1_t; + +typedef vfloat32m1_t rvv_fixed_float32m1_t RVV_FIXED_ATTR; +typedef vint32m1_t rvv_fixed_int32m1_t RVV_FIXED_ATTR; +typedef float gnu_fixed_float32m1_t GNU_FIXED_ATTR; +typedef int gnu_fixed_int32m1_t GNU_FIXED_ATTR; + +void rvv_allowed_with_integer_lax_conversions() { + rvv_fixed_int32m1_t fi32; + vint64m1_t si64; + + // The implicit cast here should fail if -flax-vector-conversions=none, but pass if + // -flax-vector-conversions={integer,all}. + fi32 = si64; + // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + si64 = fi32; + // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}} +} + +void rvv_allowed_with_all_lax_conversions() { + rvv_fixed_float32m1_t ff32; + vfloat64m1_t sf64; + + // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if + // -flax-vector-conversions=all. + ff32 = sf64; + // lax-vector-none-error@-1 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'rvv_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} +} + +void gnu_allowed_with_integer_lax_conversions() { + gnu_fixed_int32m1_t fi32; + vint64m1_t si64; + + // The implicit cast here should fail if -flax-vector-conversions=none, but pass if + // -flax-vector-conversions={integer,all}. + fi32 = si64; + // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_int32m1_t' (vector of 16 'int' values) from incompatible type}} + si64 = fi32; + // lax-vector-none-error@-1 {{assigning to 'vint64m1_t' (aka '__rvv_int64m1_t') from incompatible type}} +} + +void gnu_allowed_with_all_lax_conversions() { + gnu_fixed_float32m1_t ff32; + vfloat64m1_t sf64; + + // The implicit cast here should fail if -flax-vector-conversions={none,integer}, but pass if + // -flax-vector-conversions=all. + ff32 = sf64; + // lax-vector-none-error@-1 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'gnu_fixed_float32m1_t' (vector of 16 'float' values) from incompatible type}} + sf64 = ff32; + // lax-vector-none-error@-1 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} + // lax-vector-integer-error@-2 {{assigning to 'vfloat64m1_t' (aka '__rvv_float64m1_t') from incompatible type}} +}