diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2681,6 +2681,8 @@ bool isSVEBool() const { return getKind() == Kind::SveBool; } + bool isSVECount() const { return getKind() == Kind::SveCount; } + /// Determines whether the given kind corresponds to a placeholder type. static bool isPlaceholderTypeKind(Kind K) { return K >= Overload; @@ -3910,6 +3912,20 @@ /// because TrailingObjects cannot handle repeated types. struct ExceptionType { QualType Type; }; + /// The AArch64 SME ACLE (Arm C/C++ Language Extensions) define a number + /// of function type attributes that can be set on function types, including + /// function pointers. + enum AArch64SMETypeAttributes : unsigned { + SME_NormalFunction = 0, + SME_PStateSMEnabledMask = 1 << 0, + SME_PStateSMCompatibleMask = 1 << 1, + SME_PStateZANewMask = 1 << 2, + SME_PStateZASharedMask = 1 << 3, + SME_PStateZAPreservedMask = 1 << 4, + SME_AttributeMask = 255 // We only support maximum 8 bits because of the + // bitmask in FunctionTypeExtraBitfields + }; + /// A simple holder for various uncommon bits which do not fit in /// FunctionTypeBitfields. Aligned to alignof(void *) to maintain the /// alignment of subsequent objects in TrailingObjects. @@ -3918,6 +3934,13 @@ /// A whole unsigned is not needed here and according to /// [implimits] 8 bits would be enough here. unsigned NumExceptionType = 0; + + /// Any AArch64 SME ACLE type attributes that need to be propagated + /// on declarations and function pointers. + unsigned AArch64SMEAttributes : 8; + + FunctionTypeExtraBitfields() + : AArch64SMEAttributes(SME_NormalFunction) {} }; protected: @@ -4098,16 +4121,20 @@ FunctionType::ExtInfo ExtInfo; bool Variadic : 1; bool HasTrailingReturn : 1; + unsigned AArch64SMEAttributes : 8; Qualifiers TypeQuals; RefQualifierKind RefQualifier = RQ_None; ExceptionSpecInfo ExceptionSpec; const ExtParameterInfo *ExtParameterInfos = nullptr; SourceLocation EllipsisLoc; - ExtProtoInfo() : Variadic(false), HasTrailingReturn(false) {} + ExtProtoInfo() + : Variadic(false), HasTrailingReturn(false), + AArch64SMEAttributes(SME_NormalFunction) {} ExtProtoInfo(CallingConv CC) - : ExtInfo(CC), Variadic(false), HasTrailingReturn(false) {} + : ExtInfo(CC), Variadic(false), HasTrailingReturn(false), + AArch64SMEAttributes(SME_NormalFunction) {} ExtProtoInfo withExceptionSpec(const ExceptionSpecInfo &ESI) { ExtProtoInfo Result(*this); @@ -4116,7 +4143,12 @@ } bool requiresFunctionProtoTypeExtraBitfields() const { - return ExceptionSpec.Type == EST_Dynamic; + return ExceptionSpec.Type == EST_Dynamic || + AArch64SMEAttributes != SME_NormalFunction; + } + + void setArmSMEAttribute(AArch64SMETypeAttributes Kind) { + AArch64SMEAttributes |= Kind; } }; @@ -4243,6 +4275,7 @@ EPI.TypeQuals = getMethodQuals(); EPI.RefQualifier = getRefQualifier(); EPI.ExtParameterInfos = getExtParameterInfosOrNull(); + EPI.AArch64SMEAttributes = getAArch64SMEAttributes(); return EPI; } @@ -4424,6 +4457,14 @@ return getTrailingObjects(); } + /// Return a bitmask describing the SME attributes on the function type, see + /// AArch64SMETypeAttributes for their values. + unsigned getAArch64SMEAttributes() const { + if (!hasExtraBitfields()) + return SME_NormalFunction; + return getTrailingObjects()->AArch64SMEAttributes; + } + ExtParameterInfo getExtParameterInfo(unsigned I) const { assert(I < getNumParams() && "parameter index out of range"); if (hasExtParameterInfos()) diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td --- a/clang/include/clang/AST/TypeProperties.td +++ b/clang/include/clang/AST/TypeProperties.td @@ -323,6 +323,9 @@ ? node->getExtParameterInfos() : llvm::ArrayRef() }]; } + def : Property<"AArch64SMEAttributes", UInt32> { + let Read = [{ node->getAArch64SMEAttributes() }]; + } def : Creator<[{ auto extInfo = FunctionType::ExtInfo(noReturn, hasRegParm, regParm, @@ -338,6 +341,7 @@ epi.ExceptionSpec = exceptionSpecifier; epi.ExtParameterInfos = extParameterInfo.empty() ? nullptr : extParameterInfo.data(); + epi.AArch64SMEAttributes = AArch64SMEAttributes; return ctx.getFunctionType(returnType, parameters, epi); }]>; } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -49,6 +49,11 @@ SVE_TYPE(Name, Id, SingletonId) #endif +#ifndef SVE_OPAQUE_TYPE +#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) \ + SVE_TYPE(Name, Id, SingletonId) +#endif + //===- Vector point types -----------------------------------------------===// @@ -124,7 +129,12 @@ SVE_VECTOR_TYPE("__clang_svbfloat16x4_t", "svbfloat16x4_t", SveBFloat16x4, SveBFloat16x4Ty, 32, 16, true, false, true) SVE_PREDICATE_TYPE("__SVBool_t", "__SVBool_t", SveBool, SveBoolTy, 16) +SVE_PREDICATE_TYPE("__clang_svboolx2_t", "svboolx2_t", SveBoolx2, SveBoolx2Ty, 32) +SVE_PREDICATE_TYPE("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4Ty, 64) + +SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy) #undef SVE_VECTOR_TYPE #undef SVE_PREDICATE_TYPE +#undef SVE_OPAQUE_TYPE #undef SVE_TYPE diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -397,6 +397,9 @@ } def TargetARM : TargetArch<["arm", "thumb", "armeb", "thumbeb"]>; def TargetAArch64 : TargetArch<["aarch64"]>; +def TargetAArch64SME : TargetArch<["aarch64"]> { + let CustomCode = [{ Target.hasFeature("sme") }]; +} def TargetAnyArm : TargetArch; def TargetAVR : TargetArch<["avr"]>; def TargetBPF : TargetArch<["bpfel", "bpfeb"]>; @@ -2384,6 +2387,42 @@ let Documentation = [AArch64VectorPcsDocs]; } +def ArmStreamingCompatible : TypeAttr, TargetSpecificAttr { + let Spellings = [GNU<"arm_streaming_compatible">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmeStreamingCompatibleDocs]; +} + +def ArmStreaming : TypeAttr, TargetSpecificAttr { + let Spellings = [GNU<"arm_streaming">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmeStreamingDocs]; +} + +def ArmLocallyStreaming : InheritableAttr, TargetSpecificAttr { + let Spellings = [GNU<"arm_locally_streaming">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [ArmSmeLocallyStreamingDocs]; +} + +def ArmSharedZA : TypeAttr, TargetSpecificAttr { + let Spellings = [GNU<"arm_shared_za">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmeSharedZADocs]; +} + +def ArmPreservesZA : TypeAttr, TargetSpecificAttr { + let Spellings = [GNU<"arm_preserves_za">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmePreservesZADocs]; +} + +def ArmNewZA : TypeAttr, TargetSpecificAttr { + let Spellings = [GNU<"arm_new_za">]; + let Subjects = SubjectList<[HasFunctionProto], ErrorDiag>; + let Documentation = [ArmSmeNewZADocs]; +} + def AArch64SVEPcs: DeclOrTypeAttr { let Spellings = [Clang<"aarch64_sve_pcs">]; let Documentation = [AArch64SVEPcsDocs]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6314,6 +6314,88 @@ }]; } +def ArmSmeStreamingDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``arm_streaming`` attribute is defined by the Arm C Language Extensions +(ACLE) for SME. It is used to mark a function as being a streaming function for +which ``PSTATE.SM`` must be ``1`` on entry and on exit of the function. + +By adding this attribute, Clang will insert the appropriate ``smstart`` and +``smstop`` instructions before and after the call to guarantee that these +conditions are satisfied. + }]; +} + +def ArmSmeLocallyStreamingDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``arm_locally_streaming`` attribute is defined by the Arm C Language Extensions +(ACLE) for SME. It is used to mark a function's body (not the interface) as requiring +``PSTATE.SM`` to be ``1``, although the function is expected to be called with +``PSTATE.SM=0`` and return with ``PSTATE.SM`` unchanged. + +By adding this attribute, Clang will insert the appropriate ``smstart`` and +``smstop`` instructions in the prologue and epilogue of the function. + }]; +} + +def ArmSmeStreamingCompatibleDocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``arm_streaming_compatible`` attribute is defined by the Arm C Language +Extensions (ACLE) for SME. It is used to mark a function as being a streaming +compatible function for which ``PSTATE.SM`` can either be ``0`` or ``1`` at +runtime. Additionally, the ABI specifies that the value of ``PSTATE.SM`` is +passed in register ``X0``. + +By adding this attribute, Clang will pass an implicit parameter with the value +of ``PSTATE.SM`` in ``X0`` to streaming-compatible functions and will insert the +appropriate ``smstart`` and ``smstop`` instructions when there are calls to +other functions that are not streaming compatible. + +Clang will also avoid generating instructions that are illegal in either +streaming mode or normal mode. + }]; +} + +def ArmSmeSharedZADocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``arm_shared_za`` attribute is defined by the Arm C Language Extensions +(ACLE) for SME. It is used to mark a function as sharing the state of ZA, the +acculator array, with that of it's callers. + +By adding this attribute, callers of this function will know that the contents +of ZA may be used for passing or returning data, and can be modified. Clang may +assume that ``PSTATE.ZA`` is ``1`` and will avoid setting up a lazy-save +mechanism for calls to functions marked as ``arm_shared_za``. + }]; +} + +def ArmSmeNewZADocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``arm_new_za`` attribute is defined by the Arm C Language Extensions (ACLE) +for SME. It is used to mark a function as a private ZA function that requires a +new state for ZA. + +By adding this attribute, Clang emits the appropriate ``smstart`` instruction to +allow the use of ZA and will additionally commit a lazy-save if the state of ZA +is dormant. It also emits the appropriate ``smstop`` in the function's epilogue. + }]; +} + +def ArmSmePreservesZADocs : Documentation { + let Category = DocCatType; + let Content = [{ +The ``arm_preserves_za`` attribute is defined by the Arm C Language Extensions +(ACLE) for SME. If a function is marked as ``arm_preserves_za``, it is a hint to +the compiler that the function and any of it's callees will preserve the state +of ZA. + }]; +} + def ArmMveStrictPolymorphismDocs : Documentation { let Category = DocCatType; let Content = [{ diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -39,6 +39,7 @@ // A -> "reference" to __builtin_va_list // V -> Vector, followed by the number of elements and the base type. // q -> Scalable vector, followed by the number of elements and the base type. +// Q -> AArch64 svcount_t builtin type. // E -> ext_vector, followed by the number of elements and the base type. // X -> _Complex, followed by the base type. // Y -> ptrdiff_t diff --git a/clang/include/clang/Basic/BuiltinsSME.def b/clang/include/clang/Basic/BuiltinsSME.def new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/BuiltinsSME.def @@ -0,0 +1,20 @@ +//===--- BuiltinsSVE.def - SVE Builtin function database --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SVE-specific builtin function database. Users of +// this file must define the BUILTIN macro to make use of this information. +// +//===----------------------------------------------------------------------===// + +// The format of this database matches clang/Basic/Builtins.def. + +#define GET_SME_BUILTINS +#include "clang/Basic/arm_sme_builtins.inc" +#undef GET_SME_BUILTINS + +#undef BUILTIN diff --git a/clang/include/clang/Basic/CMakeLists.txt b/clang/include/clang/Basic/CMakeLists.txt --- a/clang/include/clang/Basic/CMakeLists.txt +++ b/clang/include/clang/Basic/CMakeLists.txt @@ -72,6 +72,24 @@ clang_tablegen(arm_sve_sema_rangechecks.inc -gen-arm-sve-sema-rangechecks SOURCE arm_sve.td TARGET ClangARMSveSemaRangeChecks) +clang_tablegen(arm_sve_streaming_attrs.inc -gen-arm-sve-streaming-attrs + SOURCE arm_sve.td + TARGET ClangARMSveStreamingAttrs) +clang_tablegen(arm_sme_builtins.inc -gen-arm-sme-builtins + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltins) +clang_tablegen(arm_sme_builtin_cg.inc -gen-arm-sme-builtin-codegen + SOURCE arm_sme.td + TARGET ClangARMSmeBuiltinCG) +clang_tablegen(arm_sme_typeflags.inc -gen-arm-sme-typeflags + SOURCE arm_sme.td + TARGET ClangARMSmeTypeFlags) +clang_tablegen(arm_sme_sema_rangechecks.inc -gen-arm-sme-sema-rangechecks + SOURCE arm_sme.td + TARGET ClangARMSmeSemaRangeChecks) +clang_tablegen(arm_sme_streaming_attrs.inc -gen-arm-sme-streaming-attrs + SOURCE arm_sme.td + TARGET ClangARMSmeStreamingAttrs) clang_tablegen(arm_cde_builtins.inc -gen-arm-cde-builtin-def SOURCE arm_cde.td TARGET ClangARMCdeBuiltinsDef) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -2003,6 +2003,10 @@ "than the function it overrides}1,2">; def note_overridden_virtual_function : Note< "overridden virtual function is here">; +def err_conflicting_overriding_attributes : Error< + "virtual function %0 has different attributes " + "%diff{($) than the function it overrides (which has $)|" + "than the function it overrides}1,2">; def err_conflicting_overriding_cc_attributes : Error< "virtual function %0 has different calling convention attributes " "%diff{($) than the function it overrides (which has calling convention $)|" @@ -3039,6 +3043,9 @@ def err_attribute_arm_feature_sve_bits_unsupported : Error< "%0 is only supported when '-msve-vector-bits=' is specified with a " "value of 128, 256, 512, 1024 or 2048.">; +def warn_attribute_arm_sm_incompat_builtin : Warning< + "builtin call has undefined behaviour when called from a %0 function">, + InGroup>; def err_attribute_requires_positive_integer : Error< "%0 attribute requires a %select{positive|non-negative}1 " "integral compile time constant expression">; @@ -3546,6 +3553,9 @@ "the vecreturn attribute can only be used on a class or structure with one member, which must be a vector">; def err_attribute_vecreturn_only_pod_record : Error< "the vecreturn attribute can only be used on a POD (plain old data) class or structure (i.e. no virtual functions)">; +def err_sme_attr_mismatch : Error< + "function declared '%0' was previously declared '%1'" + " with different SME function attributes">; def err_cconv_change : Error< "function declared '%0' here was previously declared " "%select{'%2'|without calling convention}1">; diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -53,6 +53,15 @@ }; } + namespace SME { + enum { + LastSVEBuiltin = SVE::FirstTSBuiltin - 1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/BuiltinsSME.def" + FirstTSBuiltin, + }; + } // namespace SME + /// AArch64 builtins namespace AArch64 { enum { @@ -60,8 +69,10 @@ LastNEONBuiltin = NEON::FirstTSBuiltin - 1, FirstSVEBuiltin = NEON::FirstTSBuiltin, LastSVEBuiltin = SVE::FirstTSBuiltin - 1, - #define BUILTIN(ID, TYPE, ATTRS) BI##ID, - #include "clang/Basic/BuiltinsAArch64.def" + FirstSMEBuiltin = SVE::FirstTSBuiltin, + LastSMEBuiltin = SME::FirstTSBuiltin - 1, +#define BUILTIN(ID, TYPE, ATTRS) BI##ID, +#include "clang/Basic/BuiltinsAArch64.def" LastTSBuiltin }; } @@ -282,6 +293,12 @@ bool isTupleCreate() const { return Flags & IsTupleCreate; } bool isTupleGet() const { return Flags & IsTupleGet; } bool isTupleSet() const { return Flags & IsTupleSet; } + bool isReadZA() const { return Flags & IsReadZA; } + bool isWriteZA() const { return Flags & IsWriteZA; } + bool isArmInStreamingMode() const { return Flags & IsArmInStreamingMode; } + bool isZASliceBaseOffsetIntr() const { + return Flags & IsZASliceBaseOffsetIntr; + } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/arm_sme.td @@ -0,0 +1,330 @@ +//===--- arm_sme.td - ARM SME compiler interface ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the TableGen definitions from which the ARM SME header +// file will be generated. See: +// +// https://developer.arm.com/architectures/system-architectures/software-standards/acle +// +//===----------------------------------------------------------------------===// + +include "arm_sve_common.td" + +class Inst ft, list ch, MemEltType met = MemEltTyDefault> { + string Name = n; + string Prototype = p; + string Types = t; + string ArchGuard = ""; + int Merge = MergeNone.Value; + string MergeSuffix = MergeNone.Suffix; + string LLVMIntrinsic = i; + list Flags = ft; + list ImmChecks = ch; + int MemEltType = met.Value; +} + +// MBaseInst: Memory base instruction. +class MBaseInst f, + MemEltType met, list ch> + : Inst { +} + +// MLInst: Memory load instruction. +class MLInst ch> + : MBaseInst { +} + +// MSInst: Memory store instruction. +class MSInst ch> + : MBaseInst { +} + +multiclass LoadHV ch> { + def NAME # _H : MLInst<"svld1_hor_" # n, p, met, i # "_horiz", ch>; + def NAME # _V : MLInst<"svld1_ver_" # n, p, met, i # "_vert", ch>; +} + +multiclass StoreHV ch> { + def NAME # _H : MSInst<"svst1_hor_" # n, p, met, i # "_horiz", ch>; + def NAME # _V : MSInst<"svst1_ver_" # n, p, met, i # "_vert", ch>; +} + +class ReadInst ch> + : Inst { +} + +class WriteInst ch> + : Inst { +} + +multiclass ReadHV ch> { + def NAME # _H : ReadInst<"svread_hor_" # n, p, t, i # "_horiz", ch>; + def NAME # _V : ReadInst<"svread_ver_" # n, p, t, i # "_vert", ch>; +} + +multiclass WriteHV ch> { + def NAME # _H : WriteInst<"svwrite_hor_" # n, p, t, i # "_horiz", ch>; + def NAME # _V : WriteInst<"svwrite_ver_" # n, p, t, i # "_vert", ch>; +} + +multiclass AddHV ch> { + def NAME # _H : Inst<"svaddha_" # n, p, t, i # "ha", [IsStreaming, IsSharedZA], ch>; + def NAME # _V : Inst<"svaddva_" # n, p, t, i # "va", [IsStreaming, IsSharedZA], ch>; +} + +multiclass MopAS ch> { + def NAME # _A : Inst<"svmopa_" # n, "viPPdd", t, "aarch64_sme_" # i # "a" # w, [IsStreaming, IsSharedZA], ch>; + def NAME # _S : Inst<"svmops_" # n, "viPPdd", t, "aarch64_sme_" # i # "s" # w, [IsStreaming, IsSharedZA], ch>; +} + +multiclass MixedSignMopAS ch> { + def NAME # _A : Inst; + def NAME # _S : Inst; +} + +// == LOADS == + +defm SVLD1_ZA8 : LoadHV<"za8", "vimiPQ", MemEltTyInt8, "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVLD1_ZA16 : LoadHV<"za16", "vimiPQ", MemEltTyInt16, "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVLD1_ZA32 : LoadHV<"za32", "vimiPQ", MemEltTyInt32, "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVLD1_ZA64 : LoadHV<"za64", "vimiPQ", MemEltTyInt64, "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVLD1_ZA128 : LoadHV<"za128", "vimiPQ", MemEltTyInt128, "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0>]>; + +defm SVLD1_VNUM_ZA8 : LoadHV<"vnum_za8", "vimiPQl", MemEltTyInt8, "aarch64_sme_ld1b", [ImmCheck<0, ImmCheck0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVLD1_VNUM_ZA16 : LoadHV<"vnum_za16", "vimiPQl", MemEltTyInt16, "aarch64_sme_ld1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVLD1_VNUM_ZA32 : LoadHV<"vnum_za32", "vimiPQl", MemEltTyInt32, "aarch64_sme_ld1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVLD1_VNUM_ZA64 : LoadHV<"vnum_za64", "vimiPQl", MemEltTyInt64, "aarch64_sme_ld1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVLD1_VNUM_ZA128 : LoadHV<"vnum_za128", "vimiPQl", MemEltTyInt128, "aarch64_sme_ld1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0>]>; + +def SVLDR_VNUM_ZA : MLInst<"svldr_vnum_za", "vmiPQ", MemEltTyInt8, "aarch64_sme_ldr", [ImmCheck<1, ImmCheck0_15>]>; + +// == STORES == + +defm SVST1_ZA8 : StoreHV<"za8", "vimiP{", MemEltTyInt8, "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVST1_ZA16 : StoreHV<"za16", "vimiP{", MemEltTyInt16, "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVST1_ZA32 : StoreHV<"za32", "vimiP{", MemEltTyInt32, "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVST1_ZA64 : StoreHV<"za64", "vimiP{", MemEltTyInt64, "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVST1_ZA128 : StoreHV<"za128", "vimiP{", MemEltTyInt128, "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0>]>; + +defm SVST1_VNUM_ZA8 : StoreHV<"vnum_za8", "vimiP{l", MemEltTyInt8, "aarch64_sme_st1b", [ImmCheck<0, ImmCheck0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVST1_VNUM_ZA16 : StoreHV<"vnum_za16", "vimiP{l", MemEltTyInt16, "aarch64_sme_st1h", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVST1_VNUM_ZA32 : StoreHV<"vnum_za32", "vimiP{l", MemEltTyInt32, "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVST1_VNUM_ZA64 : StoreHV<"vnum_za64", "vimiP{l", MemEltTyInt64, "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVST1_VNUM_ZA128 : StoreHV<"vnum_za128", "vimiP{l", MemEltTyInt128, "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0>]>; + +def SVSTR_VNUM_ZA : MSInst<"svstr_vnum_za", "vmiP{", MemEltTyInt8, "aarch64_sme_str", [ImmCheck<1, ImmCheck0_15>]>; + +// == MOVA/READS == + +defm SVREAD_ZA8 : ReadHV<"za8_{d}_m", "ddPimi", "cUc", "aarch64_sme_read", [ImmCheck<2, ImmCheck0>, ImmCheck<4, ImmCheck0_15>]>; +defm SVREAD_ZA16 : ReadHV<"za16_{d}_m", "ddPimi", "sUshb", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_1>, ImmCheck<4, ImmCheck0_7>]>; +defm SVREAD_ZA32 : ReadHV<"za32_{d}_m", "ddPimi", "iUif", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ReadHV<"za64_{d}_m", "ddPimi", "lUld", "aarch64_sme_read", [ImmCheck<2, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; +defm SVREAD_ZA128 : ReadHV<"za128_{d}_m", "ddPimi", "cUcsUshbiUiflUld", "aarch64_sme_readq", [ImmCheck<2, ImmCheck0_15>, ImmCheck<4, ImmCheck0>]>; + +// == MOVA/WRITES == + +defm SVWRITE_ZA8 : WriteHV<"za8_{d}_m", "vimiPd", "cUc", "aarch64_sme_write", [ImmCheck<0, ImmCheck0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVWRITE_ZA16 : WriteHV<"za16_{d}_m", "vimiPd", "sUshb", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVWRITE_ZA32 : WriteHV<"za32_{d}_m", "vimiPd", "iUif", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : WriteHV<"za64_{d}_m", "vimiPd", "lUld", "aarch64_sme_write", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVWRITE_ZA128 : WriteHV<"za128_{d}_m", "vimiPd", "cUcsUshbiUiflUld", "aarch64_sme_writeq", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0>]>; + +// == SVCNT == + +def SVCNTSB : Inst<"svcntsb", "nv", "", "aarch64_sme_cntsb", [IsOverloadNone, IsStreamingCompatible, IsPreservedZA], []>; +def SVCNTSH : Inst<"svcntsh", "nv", "", "aarch64_sme_cntsh", [IsOverloadNone, IsStreamingCompatible, IsPreservedZA], []>; +def SVCNTSW : Inst<"svcntsw", "nv", "", "aarch64_sme_cntsw", [IsOverloadNone, IsStreamingCompatible, IsPreservedZA], []>; +def SVCNTSD : Inst<"svcntsd", "nv", "", "aarch64_sme_cntsd", [IsOverloadNone, IsStreamingCompatible, IsPreservedZA], []>; + +// == ZERO == + +def SVZERO_MASK : Inst<"svzero_mask_za", "vi", "", "aarch64_sme_zero", [IsOverloadNone, IsStreaming, IsSharedZA], [ImmCheck<0, ImmCheck0_255>]>; + +// == PSTATE FUNCTIONS == +def IN_STREAMING_MODE : Inst<"__arm_in_streaming_mode", "yv", "", "", [IsOverloadNone, IsStreamingCompatible, IsArmInStreamingMode], []>; + +// == ADDHA/ADDVA == + +defm SVADD_ZA32 : AddHV<"za32_{d}", "viPPd", "iUi", "aarch64_sme_add", [ImmCheck<0, ImmCheck0_3>]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I64I64)" in { + defm SVADD_ZA64 : AddHV<"za64_{d}", "viPPd", "lUl", "aarch64_sme_add", [ImmCheck<0, ImmCheck0_7>]>; +} + +// == MOPA / MOPS == + +defm SVMOP_WIDE_ZA32_FP : MopAS<"za32_{d}", "hb", "mop", "_wide", [ImmCheck<0, ImmCheck0_3>]>; +defm SVMOP_WIDE_ZA32_S8 : MopAS<"za32_{d}", "c", "smop", "_wide", [ImmCheck<0, ImmCheck0_3>]>; +defm SVMOP_WIDE_ZA32_U8 : MopAS<"za32_{d}", "Uc", "umop", "_wide", [ImmCheck<0, ImmCheck0_3>]>; +defm SVMOP_ZA32_FP : MopAS<"za32_{d}", "f", "mop", "", [ImmCheck<0, ImmCheck0_3>]>; + +let ArchGuard = "defined(__ARM_FEATURE_SME_I64I64)" in { + defm SVMOPA_WIDE_ZA64_S16 : MopAS<"za64_{d}", "s", "smop", "_wide", [ImmCheck<0, ImmCheck0_7>]>; + defm SVMOPA_WIDE_ZA64_U16 : MopAS<"za64_{d}", "Us", "umop", "_wide", [ImmCheck<0, ImmCheck0_7>]>; +} + +let ArchGuard = "defined(__ARM_FEATURE_SME_F64F64)" in { + defm SVMOP_ZA64_FP : MopAS<"za64_{d}", "d", "mop", "", [ImmCheck<0, ImmCheck0_7>]>; +} + +// == SUMOPA / SUMOPS / USMOPS / USMOPA == + +defm SVSUMOP_ZA32_S8 : MixedSignMopAS<"svsumop", "za32_{d}", "c", "aarch64_sme_sumop", [ImmCheck<0, ImmCheck0_3>]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I64I64)" in { + defm SVSUMOP_ZA64_S16 : MixedSignMopAS<"svsumop", "za64_{d}", "s", "aarch64_sme_sumop", [ImmCheck<0, ImmCheck0_7>]>; +} + +defm SVUSMOP_ZA32_U8 : MixedSignMopAS<"svusmop", "za32_{d}", "Uc", "aarch64_sme_usmop", [ImmCheck<0, ImmCheck0_3>]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I64I64)" in { + defm SVUSMOP_ZA64_U16 : MixedSignMopAS<"svusmop", "za64_{d}", "Us", "aarch64_sme_usmop", [ImmCheck<0, ImmCheck0_7>]>; +} + +// FMLA/FMLS +let ArchGuard = "defined(__ARM_FEATURE_SME2)" in { + def SVMLA_MULTI_VG1x2_F32 : Inst<"svmla_za32[_{d}]_vg1x2", "vmi22", "f", "aarch64_sme_fmla_multi_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLA_MULTI_VG1x4_F32 : Inst<"svmla_za32[_{d}]_vg1x4", "vmi44", "f", "aarch64_sme_fmla_multi_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_MULTI_VG1x2_F32 : Inst<"svmls_za32[_{d}]_vg1x2", "vmi22", "f", "aarch64_sme_fmls_multi_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_MULTI_VG1x4_F32 : Inst<"svmls_za32[_{d}]_vg1x4", "vmi44", "f", "aarch64_sme_fmls_multi_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + + def SVMLA_SINGLE_VG1x2_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x2", "vmi2d", "f", "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLA_SINGLE_VG1x4_F32 : Inst<"svmla[_single]_za32[_{d}]_vg1x4", "vmi4d", "f", "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_SINGLE_VG1x2_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x2", "vmi2d", "f", "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_SINGLE_VG1x4_F32 : Inst<"svmls[_single]_za32[_{d}]_vg1x4", "vmi4d", "f", "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + + def SVMLA_LANE_VG1x2_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x2", "vmi2di", "f", "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_3>]>; + def SVMLA_LANE_VG1x4_F32 : Inst<"svmla_lane_za32[_{d}]_vg1x4", "vmi4di", "f", "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_3>]>; + def SVMLS_LANE_VG1x2_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x2", "vmi2di", "f", "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_3>]>; + def SVMLS_LANE_VG1x4_F32 : Inst<"svmls_lane_za32[_{d}]_vg1x4", "vmi4di", "f", "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_3>]>; +} + +let ArchGuard = "defined(__ARM_FEATURE_SME2) && defined(__ARM_FEATURE_SME_F64F64)" in { + def SVMLA_MULTI_VG1x2_F64 : Inst<"svmla_za64[_{d}]_vg1x2", "vmi22", "d", "aarch64_sme_fmla_multi_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLA_MULTI_VG1x4_F64 : Inst<"svmla_za64[_{d}]_vg1x4", "vmi44", "d", "aarch64_sme_fmla_multi_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_MULTI_VG1x2_F64 : Inst<"svmls_za64[_{d}]_vg1x2", "vmi22", "d", "aarch64_sme_fmls_multi_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_MULTI_VG1x4_F64 : Inst<"svmls_za64[_{d}]_vg1x4", "vmi44", "d", "aarch64_sme_fmls_multi_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + + def SVMLA_SINGLE_VG1x2_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x2", "vmi2d", "d", "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLA_SINGLE_VG1x4_F64 : Inst<"svmla[_single]_za64[_{d}]_vg1x4", "vmi4d", "d", "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_SINGLE_VG1x2_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x2", "vmi2d", "d", "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVMLS_SINGLE_VG1x4_F64 : Inst<"svmls[_single]_za64[_{d}]_vg1x4", "vmi4d", "d", "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + + def SVMLA_LANE_VG1x2_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x2", "vmi2di", "d", "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; + def SVMLA_LANE_VG1x4_F64 : Inst<"svmla_lane_za64[_{d}]_vg1x4", "vmi4di", "d", "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; + def SVMLS_LANE_VG1x2_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x2", "vmi2di", "d", "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; + def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vmi4di", "d", "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; +} + +// FMLAL/FMLSL/UMLAL/SMLAL +let ArchGuard = "defined(__ARM_FEATURE_SME2)" in { + // MULTI MLAL + def SVMLAL_MULTI_VG2x2_F16 : Inst<"svmlal_za32[_{d}]_vg2x2", "vmi22", "bh", "aarch64_sme_fmlal_multi_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_MULTI_VG2x4_F16 : Inst<"svmlal_za32[_{d}]_vg2x4", "vmi44", "bh", "aarch64_sme_fmlal_multi_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_MULTI_VG2x2_S16 : Inst<"svmlal_za32[_{d}]_vg2x2", "vmi22", "s", "aarch64_sme_smlal_multi_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_MULTI_VG2x4_S16 : Inst<"svmlal_za32[_{d}]_vg2x4", "vmi44", "s", "aarch64_sme_smlal_multi_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_MULTI_VG2x2_U16 : Inst<"svmlal_za32[_{d}]_vg2x2", "vmi22", "Us", "aarch64_sme_umlal_multi_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_MULTI_VG2x4_U16 : Inst<"svmlal_za32[_{d}]_vg2x4", "vmi44", "Us", "aarch64_sme_umlal_multi_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + + // MULTI MLSL + def SVMLSL_MULTI_VG2x2_F16 : Inst<"svmlsl_za32[_{d}]_vg2x2", "vmi22", "bh", "aarch64_sme_fmlsl_multi_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_MULTI_VG2x4_F16 : Inst<"svmlsl_za32[_{d}]_vg2x4", "vmi44", "bh", "aarch64_sme_fmlsl_multi_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_MULTI_VG2x2_S16 : Inst<"svmlsl_za32[_{d}]_vg2x2", "vmi22", "s", "aarch64_sme_smlsl_multi_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_MULTI_VG2x4_S16 : Inst<"svmlsl_za32[_{d}]_vg2x4", "vmi44", "s", "aarch64_sme_smlsl_multi_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_MULTI_VG2x2_U16 : Inst<"svmlsl_za32[_{d}]_vg2x2", "vmi22", "Us", "aarch64_sme_umlsl_multi_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_MULTI_VG2x4_U16 : Inst<"svmlsl_za32[_{d}]_vg2x4", "vmi44", "Us", "aarch64_sme_umlsl_multi_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + + // SINGLE MLAL + def SVMLAL_SINGLE_VG2x1_F16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x1", "vmidd", "bh", "aarch64_sme_fmlal_single_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>]>; + def SVMLAL_SINGLE_VG2x2_F16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x2", "vmi2d", "bh", "aarch64_sme_fmlal_single_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_SINGLE_VG2x4_F16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x4", "vmi4d", "bh", "aarch64_sme_fmlal_single_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_SINGLE_VG2x1_S16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x1", "vmidd", "s", "aarch64_sme_smlal_single_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>]>; + def SVMLAL_SINGLE_VG2x2_S16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x2", "vmi2d", "s", "aarch64_sme_smlal_single_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_SINGLE_VG2x4_S16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x4", "vmi4d", "s", "aarch64_sme_smlal_single_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_SINGLE_VG2x1_U16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x1", "vmidd", "Us", "aarch64_sme_umlal_single_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>]>; + def SVMLAL_SINGLE_VG2x2_U16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x2", "vmi2d", "Us", "aarch64_sme_umlal_single_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLAL_SINGLE_VG2x4_U16 : Inst<"svmlal[_single]_za32[_{d}]_vg2x4", "vmi4d", "Us", "aarch64_sme_umlal_single_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + + // SINGLE MLSL + def SVMLSL_SINGLE_VG2x1_F16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x1", "vmidd", "bh", "aarch64_sme_fmlsl_single_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>]>; + def SVMLSL_SINGLE_VG2x2_F16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x2", "vmi2d", "bh", "aarch64_sme_fmlsl_single_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_SINGLE_VG2x4_F16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x4", "vmi4d", "bh", "aarch64_sme_fmlsl_single_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_SINGLE_VG2x1_S16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x1", "vmidd", "s", "aarch64_sme_smlsl_single_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>]>; + def SVMLSL_SINGLE_VG2x2_S16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x2", "vmi2d", "s", "aarch64_sme_smlsl_single_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_SINGLE_VG2x4_S16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x4", "vmi4d", "s", "aarch64_sme_smlsl_single_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_SINGLE_VG2x1_U16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x1", "vmidd", "Us", "aarch64_sme_umlsl_single_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>]>; + def SVMLSL_SINGLE_VG2x2_U16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x2", "vmi2d", "Us", "aarch64_sme_umlsl_single_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + def SVMLSL_SINGLE_VG2x4_U16 : Inst<"svmlsl[_single]_za32[_{d}]_vg2x4", "vmi4d", "Us", "aarch64_sme_umlsl_single_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>]>; + + // INDEXED MLAL + def SVMLAL_LANE_VG2x1_F16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x1", "vmiddi", "bh", "aarch64_sme_fmlal_lane_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x2_F16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x2", "vmi2di", "bh", "aarch64_sme_fmlal_lane_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x4_F16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x4", "vmi4di", "bh", "aarch64_sme_fmlal_lane_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x1_S16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x1", "vmiddi", "s", "aarch64_sme_smlal_lane_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x2_S16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x2", "vmi2di", "s", "aarch64_sme_smlal_lane_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x4_S16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x4", "vmi4di", "s", "aarch64_sme_smlal_lane_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x1_U16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x1", "vmiddi", "Us", "aarch64_sme_umlal_lane_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x2_U16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x2", "vmi2di", "Us", "aarch64_sme_umlal_lane_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLAL_LANE_VG2x4_U16 : Inst<"svmlal[_lane]_za32[_{d}]_vg2x4", "vmi4di", "Us", "aarch64_sme_umlal_lane_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + + // INDEXED MLSL + def SVMLSL_LANE_VG2x1_F16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x1", "vmiddi", "bh", "aarch64_sme_fmlsl_lane_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x2_F16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x2", "vmi2di", "bh", "aarch64_sme_fmlsl_lane_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x4_F16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x4", "vmi4di", "bh", "aarch64_sme_fmlsl_lane_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x1_S16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x1", "vmiddi", "s", "aarch64_sme_smlsl_lane_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x2_S16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x2", "vmi2di", "s", "aarch64_sme_smlsl_lane_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x4_S16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x4", "vmi4di", "s", "aarch64_sme_smlsl_lane_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x1_U16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x1", "vmiddi", "Us", "aarch64_sme_umlsl_lane_vg2x1", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_14_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x2_U16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x2", "vmi2di", "Us", "aarch64_sme_umlsl_lane_vg2x2", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; + def SVMLSL_LANE_VG2x4_U16 : Inst<"svmlsl[_lane]_za32[_{d}]_vg2x4", "vmi4di", "Us", "aarch64_sme_umlsl_lane_vg2x4", [IsStreaming, IsSharedZA, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_6_Mul2>, ImmCheck<4, ImmCheck0_7>]>; +} + +// +// 2 and 4 vector-group read/write intrinsics. +// + +multiclass WriteHV_VG checkvg2, list checkvg4> { + def NAME # _VG2_H : Inst<"svwrite_hor_" # n # "_vg2", "vmi2", t, i # "_hor_vg2", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg2>; + def NAME # _VG2_V : Inst<"svwrite_ver_" # n # "_vg2", "vmi2", t, i # "_ver_vg2", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg2>; + def NAME # _VG4_H : Inst<"svwrite_hor_" # n # "_vg4", "vmi4", t, i # "_hor_vg4", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg4>; + def NAME # _VG4_V : Inst<"svwrite_ver_" # n # "_vg4", "vmi4", t, i # "_ver_vg4", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg4>; +} + +let ArchGuard = "defined(__ARM_FEATURE_SME2)" in { + defm SVWRITE_ZA8 : WriteHV_VG<"za8[_{d}]", "cUc", "aarch64_sme_write", [ImmCheck<1, ImmCheck0_14_Mul2>], [ImmCheck<1, ImmCheck0_12_Mul4>]>; + defm SVWRITE_ZA16 : WriteHV_VG<"za16[_{d}]", "sUshb", "aarch64_sme_write", [ImmCheck<1, ImmCheck0_6_Mul2>], [ImmCheck<1, ImmCheck0_4_Mul4>]>; + defm SVWRITE_ZA32 : WriteHV_VG<"za32[_{d}]", "iUif", "aarch64_sme_write", [ImmCheck<1, ImmCheck0_2_Mul2>], [ImmCheck<1, ImmCheck0>]>; + defm SVWRITE_ZA64 : WriteHV_VG<"za64[_{d}]", "lUld", "aarch64_sme_write", [ImmCheck<1, ImmCheck0>], [ImmCheck<1, ImmCheck0>]>; +} + +multiclass ReadHV_VG checkvg2, list checkvg4> { + def NAME # _VG2_H : Inst<"svread_hor_" # n # "_vg2", "2mi", t, i # "_hor_vg2", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg2>; + def NAME # _VG2_V : Inst<"svread_ver_" # n # "_vg2", "2mi", t, i # "_ver_vg2", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg2>; + def NAME # _VG4_H : Inst<"svread_hor_" # n # "_vg4", "4mi", t, i # "_hor_vg4", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg4>; + def NAME # _VG4_V : Inst<"svread_ver_" # n # "_vg4", "4mi", t, i # "_ver_vg4", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], checkvg4>; +} + +let ArchGuard = "defined(__ARM_FEATURE_SME2)" in { + defm SVREAD_ZA8 : ReadHV_VG<"za8_{d}", "cUc", "aarch64_sme_read", [ImmCheck<1, ImmCheck0_14_Mul2>], [ImmCheck<1, ImmCheck0_12_Mul4>]>; + defm SVREAD_ZA16 : ReadHV_VG<"za16_{d}", "sUshb", "aarch64_sme_read", [ImmCheck<1, ImmCheck0_6_Mul2>], [ImmCheck<1, ImmCheck0_4_Mul4>]>; + defm SVREAD_ZA32 : ReadHV_VG<"za32_{d}", "iUif", "aarch64_sme_read", [ImmCheck<1, ImmCheck0_2_Mul2>], [ImmCheck<1, ImmCheck0>]>; + defm SVREAD_ZA64 : ReadHV_VG<"za64_{d}", "lUld", "aarch64_sme_read", [ImmCheck<1, ImmCheck0>], [ImmCheck<1, ImmCheck0>]>; +} + + +// +// Single vector-group read/write intrinsics. +// + +let ArchGuard = "defined(__ARM_FEATURE_SME2)" in { + def SVWRITE_ZA64_VG1x2 : Inst<"svwrite_za64[_{d}]_vg1x2", "vmi2", "lUld", "aarch64_sme_write_vg1x2", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVWRITE_ZA64_VG1x4 : Inst<"svwrite_za64[_{d}]_vg1x4", "vmi4", "lUld", "aarch64_sme_write_vg1x4", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVREAD_ZA64_VG1x2 : Inst<"svread_za64_{d}_vg1x2", "2mi", "lUld", "aarch64_sme_read_vg1x2", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; + def SVREAD_ZA64_VG1x4 : Inst<"svread_za64_{d}_vg1x4", "4mi", "lUld", "aarch64_sme_read_vg1x4", [IsSharedZA, IsStreaming, IsZASliceBaseOffsetIntr], [ImmCheck<1, ImmCheck0_7>]>; +} diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -13,225 +13,7 @@ // //===----------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Instruction definitions -//===----------------------------------------------------------------------===// -// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and -// a sequence of typespecs. -// -// The name is the base name of the intrinsic, for example "svld1". This is -// then mangled by the tblgen backend to add type information ("svld1_s16"). -// -// A typespec is a sequence of uppercase characters (modifiers) followed by one -// lowercase character. A typespec encodes a particular "base type" of the -// intrinsic. -// -// An example typespec is "Us" - unsigned short - svuint16_t. The available -// typespec codes are given below. -// -// The string given to an Inst class is a sequence of typespecs. The intrinsic -// is instantiated for every typespec in the sequence. For example "sdUsUd". -// -// The prototype is a string that defines the return type of the intrinsic -// and the type of each argument. The return type and every argument gets a -// "modifier" that can change in some way the "base type" of the intrinsic. -// -// The modifier 'd' means "default" and does not modify the base type in any -// way. The available modifiers are given below. -// -// Typespecs -// --------- -// c: char -// s: short -// i: int -// l: long -// f: float -// h: half-float -// d: double -// b: bfloat - -// Typespec modifiers -// ------------------ -// P: boolean -// U: unsigned - -// Prototype modifiers -// ------------------- -// prototype: return (arg, arg, ...) -// -// 2,3,4: array of default vectors -// v: void -// x: vector of signed integers -// u: vector of unsigned integers -// d: default -// c: const pointer type -// P: predicate type -// s: scalar of element type -// a: scalar of element type (splat to vector type) -// R: scalar of 1/2 width element type (splat to vector type) -// r: scalar of 1/4 width element type (splat to vector type) -// @: unsigned scalar of 1/4 width element type (splat to vector type) -// e: 1/2 width unsigned elements, 2x element count -// b: 1/4 width unsigned elements, 4x element count -// h: 1/2 width elements, 2x element count -// q: 1/4 width elements, 4x element count -// o: 4x width elements, 1/4 element count -// -// w: vector of element type promoted to 64bits, vector maintains -// signedness of its element type. -// f: element type promoted to uint64_t (splat to vector type) -// j: element type promoted to 64bits (splat to vector type) -// K: element type bitcast to a signed integer (splat to vector type) -// L: element type bitcast to an unsigned integer (splat to vector type) -// -// i: constant uint64_t -// k: int32_t -// l: int64_t -// m: uint32_t -// n: uint64_t - -// t: svint32_t -// z: svuint32_t -// g: svuint64_t -// O: svfloat16_t -// M: svfloat32_t -// N: svfloat64_t - -// J: Prefetch type (sv_prfop) -// A: pointer to int8_t -// B: pointer to int16_t -// C: pointer to int32_t -// D: pointer to int64_t - -// E: pointer to uint8_t -// F: pointer to uint16_t -// G: pointer to uint32_t -// H: pointer to uint64_t - -// Q: const pointer to void - -// S: const pointer to int8_t -// T: const pointer to int16_t -// U: const pointer to int32_t -// V: const pointer to int64_t -// -// W: const pointer to uint8_t -// X: const pointer to uint16_t -// Y: const pointer to uint32_t -// Z: const pointer to uint64_t - -class MergeType { - int Value = val; - string Suffix = suffix; -} -def MergeNone : MergeType<0>; -def MergeAny : MergeType<1, "_x">; -def MergeOp1 : MergeType<2, "_m">; -def MergeZero : MergeType<3, "_z">; -def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit -def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument. - -class EltType { - int Value = val; -} -def EltTyInvalid : EltType<0>; -def EltTyInt8 : EltType<1>; -def EltTyInt16 : EltType<2>; -def EltTyInt32 : EltType<3>; -def EltTyInt64 : EltType<4>; -def EltTyFloat16 : EltType<5>; -def EltTyFloat32 : EltType<6>; -def EltTyFloat64 : EltType<7>; -def EltTyBool8 : EltType<8>; -def EltTyBool16 : EltType<9>; -def EltTyBool32 : EltType<10>; -def EltTyBool64 : EltType<11>; -def EltTyBFloat16 : EltType<12>; - -class MemEltType { - int Value = val; -} -def MemEltTyDefault : MemEltType<0>; -def MemEltTyInt8 : MemEltType<1>; -def MemEltTyInt16 : MemEltType<2>; -def MemEltTyInt32 : MemEltType<3>; -def MemEltTyInt64 : MemEltType<4>; - -class FlagType { - int Value = val; -} - -// These must be kept in sync with the flags in utils/TableGen/SveEmitter.h -// and include/clang/Basic/TargetBuiltins.h -def NoFlags : FlagType<0x00000000>; -def FirstEltType : FlagType<0x00000001>; -// : : -// : : -def EltTypeMask : FlagType<0x0000000f>; -def FirstMemEltType : FlagType<0x00000010>; -// : : -// : : -def MemEltTypeMask : FlagType<0x00000070>; -def FirstMergeTypeMask : FlagType<0x00000080>; -// : : -// : : -def MergeTypeMask : FlagType<0x00000380>; -def FirstSplatOperand : FlagType<0x00000400>; -// : : -// These flags are used to specify which scalar operand -// needs to be duplicated/splatted into a vector. -// : : -def SplatOperandMask : FlagType<0x00001C00>; -def IsLoad : FlagType<0x00002000>; -def IsStore : FlagType<0x00004000>; -def IsGatherLoad : FlagType<0x00008000>; -def IsScatterStore : FlagType<0x00010000>; -def IsStructLoad : FlagType<0x00020000>; -def IsStructStore : FlagType<0x00040000>; -def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default -def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types. -def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. -def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. -def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. -def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. -def IsByteIndexed : FlagType<0x01000000>; -def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. -def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. -def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. -def IsGatherPrefetch : FlagType<0x10000000>; -def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. -def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. -def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type. -def IsTupleCreate : FlagType<0x100000000>; -def IsTupleGet : FlagType<0x200000000>; -def IsTupleSet : FlagType<0x400000000>; - -// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h -class ImmCheckType { - int Value = val; -} -def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns) -def ImmCheck1_16 : ImmCheckType<1>; // 1..16 -def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1) -def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt) -def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2 -def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1) -def ImmCheck0_7 : ImmCheckType<6>; // 0..7 -def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1) -def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1) -def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1) -def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270] -def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] -def ImmCheck0_13 : ImmCheckType<12>; // 0..13 -def ImmCheck0_1 : ImmCheckType<13>; // 0..1 -def ImmCheck0_2 : ImmCheckType<14>; // 0..2 -def ImmCheck0_3 : ImmCheckType<15>; // 0..3 - -class ImmCheck { - int Arg = arg; - int EltSizeArg = eltSizeArg; - ImmCheckType Kind = kind; -} +include "arm_sve_common.td" class Inst ft, list ch, MemEltType met> { @@ -263,27 +45,27 @@ // Loads // Load one vector (scalar base) -def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; -def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1">; -def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1 : MInst<"svld1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1SB : MInst<"svld1sb_{d}", "dPS", "silUsUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1UB : MInst<"svld1ub_{d}", "dPW", "silUsUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1SH : MInst<"svld1sh_{d}", "dPT", "ilUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1UH : MInst<"svld1uh_{d}", "dPX", "ilUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1SW : MInst<"svld1sw_{d}", "dPU", "lUl", [IsLoad, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1UW : MInst<"svld1uw_{d}", "dPY", "lUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVLD1_BF : MInst<"svld1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; - def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; + def SVLD1_BF : MInst<"svld1[_{2}]", "dPc", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; + def SVLD1_VNUM_BF : MInst<"svld1_vnum[_{2}]", "dPcl", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; } // Load one vector (scalar base, VL displacement) -def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ld1">; -def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn], MemEltTyInt8, "aarch64_sve_ld1">; -def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn], MemEltTyInt16, "aarch64_sve_ld1">; -def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad], MemEltTyInt32, "aarch64_sve_ld1">; -def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1_VNUM : MInst<"svld1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ld1">; +def SVLD1SB_VNUM : MInst<"svld1sb_vnum_{d}", "dPSl", "silUsUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1UB_VNUM : MInst<"svld1ub_vnum_{d}", "dPWl", "silUsUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_ld1">; +def SVLD1SH_VNUM : MInst<"svld1sh_vnum_{d}", "dPTl", "ilUiUl", [IsLoad, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1UH_VNUM : MInst<"svld1uh_vnum_{d}", "dPXl", "ilUiUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_ld1">; +def SVLD1SW_VNUM : MInst<"svld1sw_vnum_{d}", "dPUl", "lUl", [IsLoad, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; +def SVLD1UW_VNUM : MInst<"svld1uw_vnum_{d}", "dPYl", "lUl", [IsLoad, IsZExtReturn, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_ld1">; // Load one vector (vector base) def SVLD1_GATHER_BASES_U : MInst<"svld1_gather[_{2}base]_{d}", "dPu", "ilUiUlfd", [IsGatherLoad], MemEltTyDefault, "aarch64_sve_ld1_gather_scalar_offset">; @@ -487,27 +269,27 @@ } // Load one vector, unextended load, non-temporal (scalar base) -def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1 : MInst<"svldnt1[_{2}]", "dPc", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; // Load one vector, unextended load, non-temporal (scalar base, VL displacement) -def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; +def SVLDNT1_VNUM : MInst<"svldnt1_vnum[_{2}]", "dPcl", "csilUcUsUiUlhfd", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; - def SVLDNT1_VNUM_BF : MInst<"svldnt1_vnum[_{2}]", "dPcl", "b", [IsLoad], MemEltTyDefault, "aarch64_sve_ldnt1">; + def SVLDNT1_BF : MInst<"svldnt1[_{2}]", "dPc", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; + def SVLDNT1_VNUM_BF : MInst<"svldnt1_vnum[_{2}]", "dPcl", "b", [IsLoad, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_ldnt1">; } // Load one quadword and replicate (scalar base) -def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq">; +def SVLD1RQ : SInst<"svld1rq[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1rq", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq">; + def SVLD1RQ_BF : SInst<"svld1rq[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1rq", [IsStreamingCompatible]>; } multiclass StructLoad { - def : SInst; + def : SInst; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def: SInst; + def: SInst; } } @@ -530,42 +312,42 @@ } let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; - def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; - def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; + def SVBFDOT : SInst<"svbfdot[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLALB : SInst<"svbfmlalb[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLALT : SInst<"svbfmlalt[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, IsStreamingCompatible]>; def SVBFMMLA : SInst<"svbfmmla[_{0}]", "MMdd", "b", MergeNone, "aarch64_sve_bfmmla", [IsOverloadNone]>; - def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone]>; - def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone]>; - def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone]>; - def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfdot_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_3>]>; - def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; - def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFDOT_N : SInst<"svbfdot[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfdot", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLAL_N : SInst<"svbfmlalb[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalb", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFMLALT_N : SInst<"svbfmlalt[_n_{0}]", "MMda", "b", MergeNone, "aarch64_sve_bfmlalt", [IsOverloadNone, IsStreamingCompatible]>; + def SVBFDOT_LANE : SInst<"svbfdot_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfdot_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_3>]>; + def SVBFMLALB_LANE : SInst<"svbfmlalb_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalb_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>; + def SVBFMLALT_LANE : SInst<"svbfmlalt_lane[_{0}]", "MMddn", "b", MergeNone, "aarch64_sve_bfmlalt_lane", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<3, ImmCheck0_7>]>; } //////////////////////////////////////////////////////////////////////////////// // Stores // Store one vector (scalar base) -def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; -def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; -def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1 : MInst<"svst1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1B_S : MInst<"svst1b[_{d}]", "vPAd", "sil", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1B_U : MInst<"svst1b[_{d}]", "vPEd", "UsUiUl", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1H_S : MInst<"svst1h[_{d}]", "vPBd", "il", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1H_U : MInst<"svst1h[_{d}]", "vPFd", "UiUl", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1W_S : MInst<"svst1w[_{d}]", "vPCd", "l", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1W_U : MInst<"svst1w[_{d}]", "vPGd", "Ul", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; // Store one vector (scalar base, VL displacement) -def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; -def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore], MemEltTyInt8, "aarch64_sve_st1">; -def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore], MemEltTyInt16, "aarch64_sve_st1">; -def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; -def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1_VNUM : MInst<"svst1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; +def SVST1B_VNUM_S : MInst<"svst1b_vnum[_{d}]", "vPAld", "sil", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1B_VNUM_U : MInst<"svst1b_vnum[_{d}]", "vPEld", "UsUiUl", [IsStore, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_st1">; +def SVST1H_VNUM_S : MInst<"svst1h_vnum[_{d}]", "vPBld", "il", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1H_VNUM_U : MInst<"svst1h_vnum[_{d}]", "vPFld", "UiUl", [IsStore, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_st1">; +def SVST1W_VNUM_S : MInst<"svst1w_vnum[_{d}]", "vPCld", "l", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; +def SVST1W_VNUM_U : MInst<"svst1w_vnum[_{d}]", "vPGld", "Ul", [IsStore, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_st1">; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVST1_BF : MInst<"svst1[_{d}]", "vPpd", "b", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; - def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_st1">; + def SVST1_BF : MInst<"svst1[_{d}]", "vPpd", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; + def SVST1_VNUM_BF : MInst<"svst1_vnum[_{d}]", "vPpld", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_st1">; } // Store one vector (vector base) @@ -638,9 +420,9 @@ def SVST1W_SCATTER_INDEX_S : MInst<"svst1w_scatter[_{2}base]_index[_{d}]", "vPuld", "lUl", [IsScatterStore], MemEltTyInt32, "aarch64_sve_st1_scatter_scalar_offset">; multiclass StructStore { - def : SInst; + def : SInst; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def: SInst; + def: SInst; } } // Store N vectors into N-element structure (scalar base) @@ -654,30 +436,30 @@ defm SVST4_VNUM : StructStore<"svst4_vnum[_{d}]", "vPpl4", "aarch64_sve_st4">; // Store one vector, with no truncation, non-temporal (scalar base) -def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1 : MInst<"svstnt1[_{d}]", "vPpd", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; // Store one vector, with no truncation, non-temporal (scalar base, VL displacement) -def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; +def SVSTNT1_VNUM : MInst<"svstnt1_vnum[_{d}]", "vPpld", "csilUcUsUiUlhfd", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; - def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore], MemEltTyDefault, "aarch64_sve_stnt1">; + def SVSTNT1_BF : MInst<"svstnt1[_{d}]", "vPpd", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; + def SVSTNT1_VNUM_BF : MInst<"svstnt1_vnum[_{d}]", "vPpld", "b", [IsStore, IsStreamingCompatible], MemEltTyDefault, "aarch64_sve_stnt1">; } //////////////////////////////////////////////////////////////////////////////// // Prefetches // Prefetch (Scalar base) -def SVPRFB : MInst<"svprfb", "vPQJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">; -def SVPRFH : MInst<"svprfh", "vPQJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">; -def SVPRFW : MInst<"svprfw", "vPQJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; -def SVPRFD : MInst<"svprfd", "vPQJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +def SVPRFB : MInst<"svprfb", "vPQJ", "c", [IsPrefetch, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_prf">; +def SVPRFH : MInst<"svprfh", "vPQJ", "s", [IsPrefetch, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_prf">; +def SVPRFW : MInst<"svprfw", "vPQJ", "i", [IsPrefetch, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_prf">; +def SVPRFD : MInst<"svprfd", "vPQJ", "l", [IsPrefetch, IsStreamingCompatible], MemEltTyInt64, "aarch64_sve_prf">; // Prefetch (Scalar base, VL displacement) -def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPQlJ", "c", [IsPrefetch], MemEltTyInt8, "aarch64_sve_prf">; -def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPQlJ", "s", [IsPrefetch], MemEltTyInt16, "aarch64_sve_prf">; -def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPQlJ", "i", [IsPrefetch], MemEltTyInt32, "aarch64_sve_prf">; -def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPQlJ", "l", [IsPrefetch], MemEltTyInt64, "aarch64_sve_prf">; +def SVPRFB_VNUM : MInst<"svprfb_vnum", "vPQlJ", "c", [IsPrefetch, IsStreamingCompatible], MemEltTyInt8, "aarch64_sve_prf">; +def SVPRFH_VNUM : MInst<"svprfh_vnum", "vPQlJ", "s", [IsPrefetch, IsStreamingCompatible], MemEltTyInt16, "aarch64_sve_prf">; +def SVPRFW_VNUM : MInst<"svprfw_vnum", "vPQlJ", "i", [IsPrefetch, IsStreamingCompatible], MemEltTyInt32, "aarch64_sve_prf">; +def SVPRFD_VNUM : MInst<"svprfd_vnum", "vPQlJ", "l", [IsPrefetch, IsStreamingCompatible], MemEltTyInt64, "aarch64_sve_prf">; // Prefetch (Vector bases) def SVPRFB_GATHER_BASES : MInst<"svprfb_gather[_{2}base]", "vPdJ", "UiUl", [IsGatherPrefetch], MemEltTyInt8, "aarch64_sve_prfb_gather_scalar_offset">; @@ -732,9 +514,9 @@ def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; multiclass svdup_base { - def NAME : SInst; + def NAME : SInst; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def _BF16: SInst; + def _BF16: SInst; } } @@ -743,14 +525,14 @@ defm SVDUP_X : svdup_base<"svdup[_n]_{d}", "dPs", MergeAnyExp, "aarch64_sve_dup">; defm SVDUP_Z : svdup_base<"svdup[_n]_{d}", "dPs", MergeZeroExp, "aarch64_sve_dup">; -def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">; +def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index", [IsStreamingCompatible]>; // Integer arithmetic -multiclass SInstZPZ flags=[]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; +multiclass SInstZPZ { + def _M : SInst; + def _X : SInst; + def _Z : SInst; } defm SVABS : SInstZPZ<"svabs", "csil", "aarch64_sve_abs">; @@ -758,14 +540,14 @@ //------------------------------------------------------------------------------ -multiclass SInstZPZZ flags=[]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; +multiclass SInstZPZZ { + def _M : SInst; + def _X : SInst; + def _Z : SInst; - def _N_M : SInst; - def _N_X : SInst; - def _N_Z : SInst; + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; } defm SVABD_S : SInstZPZZ<"svabd", "csil", "aarch64_sve_sabd">; @@ -787,14 +569,14 @@ //------------------------------------------------------------------------------ -multiclass SInstZPZZZ flags=[]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; +multiclass SInstZPZZZ { + def _M : SInst; + def _X : SInst; + def _Z : SInst; - def _N_M : SInst; - def _N_X : SInst; - def _N_Z : SInst; + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; } defm SVMAD : SInstZPZZZ<"svmad", "csilUcUsUiUl", "aarch64_sve_mad">; @@ -804,19 +586,19 @@ //------------------------------------------------------------------------------ -def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot">; -def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot">; -def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x">; -def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">; -def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x">; -def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">; +def SVDOT_S : SInst<"svdot[_{0}]", "ddqq", "il", MergeNone, "aarch64_sve_sdot", [IsStreamingCompatible]>; +def SVDOT_U : SInst<"svdot[_{0}]", "ddqq", "UiUl", MergeNone, "aarch64_sve_udot", [IsStreamingCompatible]>; +def SVQADD_S : SInst<"svqadd[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqadd_x", [IsStreamingCompatible]>; +def SVQADD_U : SInst<"svqadd[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x", [IsStreamingCompatible]>; +def SVQSUB_S : SInst<"svqsub[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqsub_x", [IsStreamingCompatible]>; +def SVQSUB_U : SInst<"svqsub[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x", [IsStreamingCompatible]>; -def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot">; -def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot">; -def SVQADD_N_S : SInst<"svqadd[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqadd_x">; -def SVQADD_N_U : SInst<"svqadd[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x">; -def SVQSUB_N_S : SInst<"svqsub[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqsub_x">; -def SVQSUB_N_U : SInst<"svqsub[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x">; +def SVDOT_N_S : SInst<"svdot[_n_{0}]", "ddqr", "il", MergeNone, "aarch64_sve_sdot", [IsStreamingCompatible]>; +def SVDOT_N_U : SInst<"svdot[_n_{0}]", "ddqr", "UiUl", MergeNone, "aarch64_sve_udot", [IsStreamingCompatible]>; +def SVQADD_N_S : SInst<"svqadd[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqadd_x", [IsStreamingCompatible]>; +def SVQADD_N_U : SInst<"svqadd[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqadd_x", [IsStreamingCompatible]>; +def SVQSUB_N_S : SInst<"svqsub[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqsub_x", [IsStreamingCompatible]>; +def SVQSUB_N_U : SInst<"svqsub[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_uqsub_x", [IsStreamingCompatible]>; def SVDOT_LANE_S : SInst<"svdot_lane[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_sdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; def SVDOT_LANE_U : SInst<"svdot_lane[_{d}]", "ddqqi", "UiUl", MergeNone, "aarch64_sve_udot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; @@ -836,107 +618,107 @@ // Shifts multiclass SInst_SHIFT { - def _M : SInst; - def _X : SInst; - def _Z : SInst; + def _M : SInst; + def _X : SInst; + def _Z : SInst; - def _N_M : SInst; - def _N_X : SInst; - def _N_Z : SInst; + def _N_M : SInst; + def _N_X : SInst; + def _N_Z : SInst; - def _WIDE_M : SInst; - def _WIDE_X : SInst; - def _WIDE_Z : SInst; + def _WIDE_M : SInst; + def _WIDE_X : SInst; + def _WIDE_Z : SInst; - def _WIDE_N_M : SInst; - def _WIDE_N_X : SInst; - def _WIDE_N_Z : SInst; + def _WIDE_N_M : SInst; + def _WIDE_N_X : SInst; + def _WIDE_N_Z : SInst; } defm SVASR : SInst_SHIFT<"svasr", "aarch64_sve_asr", "csil", "csi">; defm SVLSL : SInst_SHIFT<"svlsl", "aarch64_sve_lsl", "csilUcUsUiUl", "csiUcUsUi">; defm SVLSR : SInst_SHIFT<"svlsr", "aarch64_sve_lsr", "UcUsUiUl", "UcUsUi">; -def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; +def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr">; + def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // Integer reductions -def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv">; -def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv">; -def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv">; -def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv">; -def SVMAXV_S : SInst<"svmaxv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_smaxv">; -def SVMAXV_U : SInst<"svmaxv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxv">; -def SVMINV_S : SInst<"svminv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_sminv">; -def SVMINV_U : SInst<"svminv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_uminv">; -def SVORV : SInst<"svorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orv">; +def SVADDV_S : SInst<"svaddv[_{d}]", "lPd", "csil", MergeNone, "aarch64_sve_saddv", [IsStreamingCompatible]>; +def SVADDV_U : SInst<"svaddv[_{d}]", "nPd", "UcUsUiUl", MergeNone, "aarch64_sve_uaddv", [IsStreamingCompatible]>; +def SVANDV : SInst<"svandv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_andv", [IsStreamingCompatible]>; +def SVEORV : SInst<"sveorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorv", [IsStreamingCompatible]>; +def SVMAXV_S : SInst<"svmaxv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_smaxv", [IsStreamingCompatible]>; +def SVMAXV_U : SInst<"svmaxv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_umaxv", [IsStreamingCompatible]>; +def SVMINV_S : SInst<"svminv[_{d}]", "sPd", "csil", MergeNone, "aarch64_sve_sminv", [IsStreamingCompatible]>; +def SVMINV_U : SInst<"svminv[_{d}]", "sPd", "UcUsUiUl", MergeNone, "aarch64_sve_uminv", [IsStreamingCompatible]>; +def SVORV : SInst<"svorv[_{d}]", "sPd", "csilUcUsUiUl", MergeNone, "aarch64_sve_orv", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Integer comparisons -def SVCMPEQ : SInst<"svcmpeq[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq">; -def SVCMPNE : SInst<"svcmpne[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne">; -def SVCMPGE : SInst<"svcmpge[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge">; -def SVCMPGT : SInst<"svcmpgt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt">; -def SVCMPLE : SInst<"svcmple[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare]>; -def SVCMPLT : SInst<"svcmplt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare]>; -def SVCMPHI : SInst<"svcmpgt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi">; -def SVCMPHS : SInst<"svcmpge[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs">; -def SVCMPLO : SInst<"svcmplt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare]>; -def SVCMPLS : SInst<"svcmple[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare]>; - -def SVCMPEQ_N : SInst<"svcmpeq[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq">; -def SVCMPNE_N : SInst<"svcmpne[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne">; -def SVCMPGE_N : SInst<"svcmpge[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge">; -def SVCMPGT_N : SInst<"svcmpgt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt">; -def SVCMPLE_N : SInst<"svcmple[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare]>; -def SVCMPLT_N : SInst<"svcmplt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare]>; -def SVCMPHS_N : SInst<"svcmpge[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs">; -def SVCMPHI_N : SInst<"svcmpgt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi">; -def SVCMPLS_N : SInst<"svcmple[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare]>; -def SVCMPLO_N : SInst<"svcmplt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare]>; - -def SVCMPEQ_WIDE : SInst<"svcmpeq_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpeq_wide">; -def SVCMPNE_WIDE : SInst<"svcmpne_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpne_wide">; -def SVCMPGE_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpge_wide">; -def SVCMPGT_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpgt_wide">; -def SVCMPLE_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmple_wide">; -def SVCMPLT_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmplt_wide">; -def SVCMPHI_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide">; -def SVCMPHS_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide">; -def SVCMPLO_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide">; -def SVCMPLS_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide">; - -def SVCMPEQ_WIDE_N : SInst<"svcmpeq_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpeq_wide">; -def SVCMPNE_WIDE_N : SInst<"svcmpne_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpne_wide">; -def SVCMPGE_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpge_wide">; -def SVCMPGT_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpgt_wide">; -def SVCMPLE_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmple_wide">; -def SVCMPLT_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmplt_wide">; -def SVCMPHS_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide">; -def SVCMPHI_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide">; -def SVCMPLO_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide">; -def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide">; +def SVCMPEQ : SInst<"svcmpeq[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq", [IsStreamingCompatible]>; +def SVCMPNE : SInst<"svcmpne[_{d}]", "PPdd", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne", [IsStreamingCompatible]>; +def SVCMPGE : SInst<"svcmpge[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [IsStreamingCompatible]>; +def SVCMPGT : SInst<"svcmpgt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [IsStreamingCompatible]>; +def SVCMPLE : SInst<"svcmple[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT : SInst<"svcmplt[_{d}]", "PPdd", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPHI : SInst<"svcmpgt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [IsStreamingCompatible]>; +def SVCMPHS : SInst<"svcmpge[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [IsStreamingCompatible]>; +def SVCMPLO : SInst<"svcmplt[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLS : SInst<"svcmple[_{d}]", "PPdd", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare, IsStreamingCompatible]>; + +def SVCMPEQ_N : SInst<"svcmpeq[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpeq", [IsStreamingCompatible]>; +def SVCMPNE_N : SInst<"svcmpne[_n_{d}]", "PPda", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmpne", [IsStreamingCompatible]>; +def SVCMPGE_N : SInst<"svcmpge[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [IsStreamingCompatible]>; +def SVCMPGT_N : SInst<"svcmpgt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [IsStreamingCompatible]>; +def SVCMPLE_N : SInst<"svcmple[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT_N : SInst<"svcmplt[_n_{d}]", "PPda", "csil", MergeNone, "aarch64_sve_cmpgt", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPHS_N : SInst<"svcmpge[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [IsStreamingCompatible]>; +def SVCMPHI_N : SInst<"svcmpgt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [IsStreamingCompatible]>; +def SVCMPLS_N : SInst<"svcmple[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphs", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLO_N : SInst<"svcmplt[_n_{d}]", "PPda", "UcUsUiUl", MergeNone, "aarch64_sve_cmphi", [ReverseCompare, IsStreamingCompatible]>; + +def SVCMPEQ_WIDE : SInst<"svcmpeq_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpeq_wide", [IsStreamingCompatible]>; +def SVCMPNE_WIDE : SInst<"svcmpne_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpne_wide", [IsStreamingCompatible]>; +def SVCMPGE_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpge_wide", [IsStreamingCompatible]>; +def SVCMPGT_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmpgt_wide", [IsStreamingCompatible]>; +def SVCMPLE_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmple_wide", [IsStreamingCompatible]>; +def SVCMPLT_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "csi", MergeNone, "aarch64_sve_cmplt_wide", [IsStreamingCompatible]>; +def SVCMPHI_WIDE : SInst<"svcmpgt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide", [IsStreamingCompatible]>; +def SVCMPHS_WIDE : SInst<"svcmpge_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide", [IsStreamingCompatible]>; +def SVCMPLO_WIDE : SInst<"svcmplt_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide", [IsStreamingCompatible]>; +def SVCMPLS_WIDE : SInst<"svcmple_wide[_{d}]", "PPdw", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide", [IsStreamingCompatible]>; + +def SVCMPEQ_WIDE_N : SInst<"svcmpeq_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpeq_wide", [IsStreamingCompatible]>; +def SVCMPNE_WIDE_N : SInst<"svcmpne_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpne_wide", [IsStreamingCompatible]>; +def SVCMPGE_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpge_wide", [IsStreamingCompatible]>; +def SVCMPGT_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmpgt_wide", [IsStreamingCompatible]>; +def SVCMPLE_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmple_wide", [IsStreamingCompatible]>; +def SVCMPLT_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "csi", MergeNone, "aarch64_sve_cmplt_wide", [IsStreamingCompatible]>; +def SVCMPHS_WIDE_N : SInst<"svcmpge_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphs_wide", [IsStreamingCompatible]>; +def SVCMPHI_WIDE_N : SInst<"svcmpgt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmphi_wide", [IsStreamingCompatible]>; +def SVCMPLO_WIDE_N : SInst<"svcmplt_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmplo_wide", [IsStreamingCompatible]>; +def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, "aarch64_sve_cmpls_wide", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // While comparisons -def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile]>; -def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile]>; -def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile]>; -def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile]>; -def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile]>; -def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile]>; -def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; -def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile]>; +def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Counting bit @@ -947,12 +729,12 @@ def _Z : SInst; } -defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls">; -defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz">; -defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; +defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls", [IsStreamingCompatible]>; +defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz", [IsStreamingCompatible]>; +defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt">; + defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1007,13 +789,13 @@ def SVTSMUL : SInst<"svtsmul[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftsmul_x">; def SVTSSEL : SInst<"svtssel[_{d}]", "ddu", "hfd", MergeNone, "aarch64_sve_ftssel_x">; -def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale">; -def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale">; -def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale">; +def SVSCALE_M : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeOp1, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_X : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeAny, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_Z : SInst<"svscale[_{d}]", "dPdx", "hfd", MergeZero, "aarch64_sve_fscale", [IsStreamingCompatible]>; -def SVSCALE_N_M : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeOp1, "aarch64_sve_fscale">; -def SVSCALE_N_X : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeAny, "aarch64_sve_fscale">; -def SVSCALE_N_Z : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeZero, "aarch64_sve_fscale">; +def SVSCALE_N_M : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeOp1, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_N_X : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeAny, "aarch64_sve_fscale", [IsStreamingCompatible]>; +def SVSCALE_N_Z : SInst<"svscale[_n_{d}]", "dPdK", "hfd", MergeZero, "aarch64_sve_fscale", [IsStreamingCompatible]>; defm SVMAD_F : SInstZPZZZ<"svmad", "hfd", "aarch64_sve_fmad">; defm SVMLA_F : SInstZPZZZ<"svmla", "hfd", "aarch64_sve_fmla">; @@ -1024,42 +806,42 @@ defm SVNMLS_F : SInstZPZZZ<"svnmls", "hfd", "aarch64_sve_fnmls">; defm SVNMSB_F : SInstZPZZZ<"svnmsb", "hfd", "aarch64_sve_fnmsb">; -def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCADD_X : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeAny, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCADD_Z : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeZero, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>; -def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_X : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeAny, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_Z : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeZero, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCADD_X : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeAny, "aarch64_sve_fcadd", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCADD_Z : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeZero, "aarch64_sve_fcadd", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRot90_270>]>; +def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_X : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeAny, "aarch64_sve_fcmla", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>]>; +def SVCMLA_Z : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeZero, "aarch64_sve_fcmla", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, +def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE : SInst<"svmul_lane[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_fmul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE : SInst<"svmls_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmls_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE : SInst<"svmul_lane[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_fmul_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVRECPE : SInst<"svrecpe[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frecpe_x">; -def SVRECPS : SInst<"svrecps[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frecps_x">; -def SVRSQRTE : SInst<"svrsqrte[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frsqrte_x">; -def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frsqrts_x">; +def SVRECPE : SInst<"svrecpe[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frecpe_x", [IsStreamingCompatible]>; +def SVRECPS : SInst<"svrecps[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frecps_x", [IsStreamingCompatible]>; +def SVRSQRTE : SInst<"svrsqrte[_{d}]", "dd", "hfd", MergeNone, "aarch64_sve_frsqrte_x", [IsStreamingCompatible]>; +def SVRSQRTS : SInst<"svrsqrts[_{d}]", "ddd", "hfd", MergeNone, "aarch64_sve_frsqrts_x", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Floating-point reductions def SVFADDA : SInst<"svadda[_{d}]", "sPsd", "hfd", MergeNone, "aarch64_sve_fadda">; -def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv">; -def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv">; -def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv">; -def SVFMINV : SInst<"svminv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminv">; -def SVFMINNMV : SInst<"svminnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminnmv">; +def SVFADDV : SInst<"svaddv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_faddv", [IsStreamingCompatible]>; +def SVFMAXV : SInst<"svmaxv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxv", [IsStreamingCompatible]>; +def SVFMAXNMV : SInst<"svmaxnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fmaxnmv", [IsStreamingCompatible]>; +def SVFMINV : SInst<"svminv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminv", [IsStreamingCompatible]>; +def SVFMINNMV : SInst<"svminnmv[_{d}]", "sPd", "hfd", MergeNone, "aarch64_sve_fminnmv", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Floating-point comparisons -def SVACGE : SInst<"svacge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge">; -def SVACGT : SInst<"svacgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt">; -def SVACLE : SInst<"svacle[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [ReverseCompare]>; -def SVACLT : SInst<"svaclt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare]>; -def SVCMPUO : SInst<"svcmpuo[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpuo">; +def SVACGE : SInst<"svacge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [IsStreamingCompatible]>; +def SVACGT : SInst<"svacgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [IsStreamingCompatible]>; +def SVACLE : SInst<"svacle[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facge", [ReverseCompare, IsStreamingCompatible]>; +def SVACLT : SInst<"svaclt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPUO : SInst<"svcmpuo[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpuo", [IsStreamingCompatible]>; def SVACGE_N : SInst<"svacge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facge">; def SVACGT_N : SInst<"svacgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facgt">; @@ -1067,19 +849,19 @@ def SVACLT_N : SInst<"svaclt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_facgt", [ReverseCompare]>; def SVCMPUO_N : SInst<"svcmpuo[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpuo">; -def SVCMPEQ_F : SInst<"svcmpeq[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpeq">; -def SVCMPNE_F : SInst<"svcmpne[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpne">; -def SVCMPGE_F : SInst<"svcmpge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge">; -def SVCMPGT_F : SInst<"svcmpgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt">; -def SVCMPLE_F : SInst<"svcmple[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare]>; -def SVCMPLT_F : SInst<"svcmplt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare]>; +def SVCMPEQ_F : SInst<"svcmpeq[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpeq", [IsStreamingCompatible]>; +def SVCMPNE_F : SInst<"svcmpne[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpne", [IsStreamingCompatible]>; +def SVCMPGE_F : SInst<"svcmpge[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [IsStreamingCompatible]>; +def SVCMPGT_F : SInst<"svcmpgt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [IsStreamingCompatible]>; +def SVCMPLE_F : SInst<"svcmple[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT_F : SInst<"svcmplt[_{d}]", "PPdd", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare, IsStreamingCompatible]>; -def SVCMPEQ_F_N : SInst<"svcmpeq[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpeq">; -def SVCMPNE_F_N : SInst<"svcmpne[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpne">; -def SVCMPGE_F_N : SInst<"svcmpge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge">; -def SVCMPGT_F_N : SInst<"svcmpgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt">; -def SVCMPLE_F_N : SInst<"svcmple[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare]>; -def SVCMPLT_F_N : SInst<"svcmplt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare]>; +def SVCMPEQ_F_N : SInst<"svcmpeq[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpeq", [IsStreamingCompatible]>; +def SVCMPNE_F_N : SInst<"svcmpne[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpne", [IsStreamingCompatible]>; +def SVCMPGE_F_N : SInst<"svcmpge[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [IsStreamingCompatible]>; +def SVCMPGT_F_N : SInst<"svcmpgt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [IsStreamingCompatible]>; +def SVCMPLE_F_N : SInst<"svcmple[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpge", [ReverseCompare, IsStreamingCompatible]>; +def SVCMPLT_F_N : SInst<"svcmplt[_n_{d}]", "PPda", "hfd", MergeNone, "aarch64_sve_fcmpgt", [ReverseCompare, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Floating-point conversions @@ -1087,16 +869,16 @@ multiclass SInstCvtMXZ< string name, string m_types, string xz_types, string types, string intrinsic, list flags = [IsOverloadNone]> { - def _M : SInst; - def _X : SInst; - def _Z : SInst; + def _M : SInst; + def _X : SInst; + def _Z : SInst; } multiclass SInstCvtMX flags = [IsOverloadNone]> { - def _M : SInst; - def _X : SInst; + def _M : SInst; + def _X : SInst; } // svcvt_s##_f16 @@ -1110,7 +892,7 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { defm SVCVT_BF16_F32 : SInstCvtMXZ<"svcvt_bf16[_f32]", "ddPM", "dPM", "b", "aarch64_sve_fcvt_bf16f32">; - def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone]>; + def SVCVTNT_BF16_F32 : SInst<"svcvtnt_bf16[_f32]", "ddPM", "b", MergeOp1, "aarch64_sve_fcvtnt_bf16f32", [IsOverloadNone, IsStreamingCompatible]>; } // svcvt_s##_f64 @@ -1174,11 +956,11 @@ defm SVCVTX_F32 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">; -def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone]>; -def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone]>; +def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone, IsStreamingCompatible]>; +def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone, IsStreamingCompatible]>; // SVCVTNT_X : Implemented as macro by SveEmitter.cpp -def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone]>; +def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone, IsStreamingCompatible]>; // SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp } @@ -1187,9 +969,9 @@ // Permutations and selection multiclass SVEPerm { - def : SInst; + def : SInst; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def: SInst; + def: SInst; } } @@ -1213,60 +995,60 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">; } -def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; +def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [IsStreamingCompatible], [ImmCheck<2, ImmCheckExtract, 1>]>; defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; -def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">; -def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; -def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; -def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; +def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev", [IsStreamingCompatible]>; +def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel", [IsStreamingCompatible]>; +def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice", [IsStreamingCompatible]>; +def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { - def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">; + def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl", [IsStreamingCompatible]>; } -def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; -def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; -def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; -def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi">; -def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo">; -def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo">; -def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1">; -def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2">; +def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1", [IsStreamingCompatible]>; +def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2", [IsStreamingCompatible]>; +def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi", [IsStreamingCompatible]>; +def SVUNPKHI_U : SInst<"svunpkhi[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpkhi", [IsStreamingCompatible]>; +def SVUNPKLO_S : SInst<"svunpklo[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpklo", [IsStreamingCompatible]>; +def SVUNPKLO_U : SInst<"svunpklo[_{d}]", "dh", "UsUiUl", MergeNone, "aarch64_sve_uunpklo", [IsStreamingCompatible]>; +def SVUZP1 : SInst<"svuzp1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1", [IsStreamingCompatible]>; +def SVUZP2 : SInst<"svuzp2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2", [IsStreamingCompatible]>; +def SVZIP1 : SInst<"svzip1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1", [IsStreamingCompatible]>; +def SVZIP2 : SInst<"svzip2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { -def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; -def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev">; -def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel">; -def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice">; -def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1">; -def SVTRN2_BF16 : SInst<"svtrn2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2">; -def SVUZP1_BF16 : SInst<"svuzp1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2_BF16 : SInst<"svuzp2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1">; -def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2">; -} - -def SVREV_B : SInst<"svrev_{d}", "PP", "PcPsPiPl", MergeNone, "aarch64_sve_rev">; -def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel">; -def SVTRN1_B : SInst<"svtrn1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn1">; -def SVTRN2_B : SInst<"svtrn2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn2">; -def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi">; -def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo">; -def SVUZP1_B : SInst<"svuzp1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp1">; -def SVUZP2_B : SInst<"svuzp2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp2">; -def SVZIP1_B : SInst<"svzip1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip1">; -def SVZIP2_B : SInst<"svzip2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip2">; +def SVEXT_BF16 : SInst<"svext[_{d}]", "dddi", "b", MergeNone, "aarch64_sve_ext", [IsStreamingCompatible], [ImmCheck<2, ImmCheckExtract, 1>]>; +def SVREV_BF16 : SInst<"svrev[_{d}]", "dd", "b", MergeNone, "aarch64_sve_rev", [IsStreamingCompatible]>; +def SVSEL_BF16 : SInst<"svsel[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_sel", [IsStreamingCompatible]>; +def SVSPLICE_BF16 : SInst<"svsplice[_{d}]", "dPdd", "b", MergeNone, "aarch64_sve_splice", [IsStreamingCompatible]>; +def SVTRN1_BF16 : SInst<"svtrn1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn1", [IsStreamingCompatible]>; +def SVTRN2_BF16 : SInst<"svtrn2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_trn2", [IsStreamingCompatible]>; +def SVUZP1_BF16 : SInst<"svuzp1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp1", [IsStreamingCompatible]>; +def SVUZP2_BF16 : SInst<"svuzp2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_uzp2", [IsStreamingCompatible]>; +def SVZIP1_BF16 : SInst<"svzip1[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip1", [IsStreamingCompatible]>; +def SVZIP2_BF16 : SInst<"svzip2[_{d}]", "ddd", "b", MergeNone, "aarch64_sve_zip2", [IsStreamingCompatible]>; +} + +def SVREV_B : SInst<"svrev_{d}", "PP", "PcPsPiPl", MergeNone, "aarch64_sve_rev", [IsStreamingCompatible]>; +def SVSEL_B : SInst<"svsel[_b]", "PPPP", "Pc", MergeNone, "aarch64_sve_sel", [IsStreamingCompatible]>; +def SVTRN1_B : SInst<"svtrn1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn1", [IsStreamingCompatible]>; +def SVTRN2_B : SInst<"svtrn2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_trn2", [IsStreamingCompatible]>; +def SVPUNPKHI : SInst<"svunpkhi[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpkhi", [IsStreamingCompatible]>; +def SVPUNPKLO : SInst<"svunpklo[_b]", "PP", "Pc", MergeNone, "aarch64_sve_punpklo", [IsStreamingCompatible]>; +def SVUZP1_B : SInst<"svuzp1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp1", [IsStreamingCompatible]>; +def SVUZP2_B : SInst<"svuzp2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_uzp2", [IsStreamingCompatible]>; +def SVZIP1_B : SInst<"svzip1_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip1", [IsStreamingCompatible]>; +def SVZIP2_B : SInst<"svzip2_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_zip2", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Predicate creation -def SVPFALSE : SInst<"svpfalse[_b]", "Pv", "", MergeNone, "", [IsOverloadNone]>; +def SVPFALSE : SInst<"svpfalse[_b]", "Pv", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; -def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue">; -def SVPTRUE : SInst<"svptrue_{d}", "Pv", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsAppendSVALL]>; +def SVPTRUE_PAT : SInst<"svptrue_pat_{d}", "PI", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsStreamingCompatible]>; +def SVPTRUE : SInst<"svptrue_{d}", "Pv", "PcPsPiPl", MergeNone, "aarch64_sve_ptrue", [IsStreamingCompatible, IsAppendSVALL]>; def SVDUPQ_B8 : SInst<"svdupq[_n]_{d}", "Pssssssssssssssss", "Pc", MergeNone>; def SVDUPQ_B16 : SInst<"svdupq[_n]_{d}", "Pssssssss", "Ps", MergeNone>; @@ -1278,33 +1060,33 @@ //////////////////////////////////////////////////////////////////////////////// // Predicate operations -def SVAND_B_Z : SInst<"svand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_and_z">; -def SVBIC_B_Z : SInst<"svbic[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_bic_z">; -def SVEOR_B_Z : SInst<"sveor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_eor_z">; -def SVMOV_B_Z : SInst<"svmov[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion -def SVNAND_B_Z : SInst<"svnand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nand_z">; -def SVNOR_B_Z : SInst<"svnor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nor_z">; -def SVNOT_B_Z : SInst<"svnot[_b]_z", "PPP", "Pc", MergeNone>; // Uses custom expansion -def SVORN_B_Z : SInst<"svorn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orn_z">; -def SVORR_B_Z : SInst<"svorr[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orr_z">; - -def SVBRKA : SInst<"svbrka[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brka">; -def SVBRKA_Z : SInst<"svbrka[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brka_z">; -def SVBRKB : SInst<"svbrkb[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brkb">; -def SVBRKB_Z : SInst<"svbrkb[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brkb_z">; -def SVBRKN_Z : SInst<"svbrkn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkn_z">; -def SVBRKPA_Z : SInst<"svbrkpa[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpa_z">; -def SVBRKPB_Z : SInst<"svbrkpb[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpb_z">; - -def SVPFIRST : SInst<"svpfirst[_b]", "PPP", "Pc", MergeNone, "aarch64_sve_pfirst">; -def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext">; +def SVAND_B_Z : SInst<"svand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_and_z", [IsStreamingCompatible]>; +def SVBIC_B_Z : SInst<"svbic[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_bic_z", [IsStreamingCompatible]>; +def SVEOR_B_Z : SInst<"sveor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_eor_z", [IsStreamingCompatible]>; +def SVMOV_B_Z : SInst<"svmov[_b]_z", "PPP", "Pc", MergeNone, "", [IsStreamingCompatible]>; // Uses custom expansion +def SVNAND_B_Z : SInst<"svnand[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nand_z", [IsStreamingCompatible]>; +def SVNOR_B_Z : SInst<"svnor[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_nor_z", [IsStreamingCompatible]>; +def SVNOT_B_Z : SInst<"svnot[_b]_z", "PPP", "Pc", MergeNone, "", [IsStreamingCompatible]>; // Uses custom expansion +def SVORN_B_Z : SInst<"svorn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orn_z", [IsStreamingCompatible]>; +def SVORR_B_Z : SInst<"svorr[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_orr_z", [IsStreamingCompatible]>; + +def SVBRKA : SInst<"svbrka[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brka", [IsStreamingCompatible]>; +def SVBRKA_Z : SInst<"svbrka[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brka_z", [IsStreamingCompatible]>; +def SVBRKB : SInst<"svbrkb[_b]_m", "PPPP", "Pc", MergeNone, "aarch64_sve_brkb", [IsStreamingCompatible]>; +def SVBRKB_Z : SInst<"svbrkb[_b]_z", "PPP", "Pc", MergeNone, "aarch64_sve_brkb_z", [IsStreamingCompatible]>; +def SVBRKN_Z : SInst<"svbrkn[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkn_z", [IsStreamingCompatible]>; +def SVBRKPA_Z : SInst<"svbrkpa[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpa_z", [IsStreamingCompatible]>; +def SVBRKPB_Z : SInst<"svbrkpb[_b]_z", "PPPP", "Pc", MergeNone, "aarch64_sve_brkpb_z", [IsStreamingCompatible]>; + +def SVPFIRST : SInst<"svpfirst[_b]", "PPP", "Pc", MergeNone, "aarch64_sve_pfirst", [IsStreamingCompatible]>; +def SVPNEXT : SInst<"svpnext_{d}", "PPP", "PcPsPiPl", MergeNone, "aarch64_sve_pnext", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Testing predicates -def SVPTEST_ANY : SInst<"svptest_any", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_any">; -def SVPTEST_FIRST : SInst<"svptest_first", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_first">; -def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_last">; +def SVPTEST_ANY : SInst<"svptest_any", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_any", [IsStreamingCompatible]>; +def SVPTEST_FIRST : SInst<"svptest_first", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_first", [IsStreamingCompatible]>; +def SVPTEST_LAST : SInst<"svptest_last", "sPP", "Pc", MergeNone, "aarch64_sve_ptest_last", [IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // FFR manipulation @@ -1317,21 +1099,21 @@ //////////////////////////////////////////////////////////////////////////////// // Counting elements -def SVCNTB_PAT : SInst<"svcntb_pat", "nI", "", MergeNone, "aarch64_sve_cntb", [IsOverloadNone]>; -def SVCNTH_PAT : SInst<"svcnth_pat", "nI", "", MergeNone, "aarch64_sve_cnth", [IsOverloadNone]>; -def SVCNTW_PAT : SInst<"svcntw_pat", "nI", "", MergeNone, "aarch64_sve_cntw", [IsOverloadNone]>; -def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [IsOverloadNone]>; +def SVCNTB_PAT : SInst<"svcntb_pat", "nI", "", MergeNone, "aarch64_sve_cntb", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTH_PAT : SInst<"svcnth_pat", "nI", "", MergeNone, "aarch64_sve_cnth", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTW_PAT : SInst<"svcntw_pat", "nI", "", MergeNone, "aarch64_sve_cntw", [IsOverloadNone, IsStreamingCompatible]>; +def SVCNTD_PAT : SInst<"svcntd_pat", "nI", "", MergeNone, "aarch64_sve_cntd", [IsOverloadNone, IsStreamingCompatible]>; -def SVCNTB : SInst<"svcntb", "nv", "", MergeNone, "aarch64_sve_cntb", [IsAppendSVALL, IsOverloadNone]>; -def SVCNTH : SInst<"svcnth", "nv", "", MergeNone, "aarch64_sve_cnth", [IsAppendSVALL, IsOverloadNone]>; -def SVCNTW : SInst<"svcntw", "nv", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone]>; -def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone]>; +def SVCNTB : SInst<"svcntb", "nv", "", MergeNone, "aarch64_sve_cntb", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; +def SVCNTH : SInst<"svcnth", "nv", "", MergeNone, "aarch64_sve_cnth", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; +def SVCNTW : SInst<"svcntw", "nv", "", MergeNone, "aarch64_sve_cntw", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; +def SVCNTD : SInst<"svcntd", "nv", "", MergeNone, "aarch64_sve_cntd", [IsAppendSVALL, IsOverloadNone, IsStreamingCompatible]>; -def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp">; -def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone>; +def SVCNTP : SInst<"svcntp_{d}", "nPP", "PcPsPiPl", MergeNone, "aarch64_sve_cntp", [IsStreamingCompatible]>; +def SVLEN : SInst<"svlen[_{d}]", "nd", "csilUcUsUiUlhfd", MergeNone, "", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { -def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone>; +def SVLEN_BF16 : SInst<"svlen[_{d}]", "nd", "b", MergeNone, "", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1348,20 +1130,20 @@ def UnsignedDoubleWord : sat_type<"U", "Ul">; multiclass SInst_SAT1 { - def _N32 : SInst]>; - def _N64 : SInst]>; - def _N32_ALL : SInst]>; - def _N64_ALL : SInst]>; + def _N32 : SInst]>; + def _N64 : SInst]>; + def _N32_ALL : SInst]>; + def _N64_ALL : SInst]>; } multiclass SInst_SAT2 { - def "" : SInst]>; - def _ALL : SInst]>; + def "" : SInst]>; + def _ALL : SInst]>; - def _N32 : SInst]>; - def _N64 : SInst]>; - def _N32_ALL : SInst]>; - def _N64_ALL : SInst]>; + def _N32 : SInst]>; + def _N64 : SInst]>; + def _N32_ALL : SInst]>; + def _N64_ALL : SInst]>; } defm SVQDECB_S : SInst_SAT1<"svqdecb", "aarch64_sve_sqdecb", SignedByte>; @@ -1382,32 +1164,32 @@ defm SVQINCD_S : SInst_SAT2<"svqincd", "aarch64_sve_sqincd", SignedDoubleWord>; defm SVQINCD_U : SInst_SAT2<"svqincd", "aarch64_sve_uqincd", UnsignedDoubleWord>; -def SVQDECP_S : SInst<"svqdecp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqdecp">; -def SVQDECP_U : SInst<"svqdecp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqdecp">; -def SVQINCP_S : SInst<"svqincp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqincp">; -def SVQINCP_U : SInst<"svqincp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqincp">; +def SVQDECP_S : SInst<"svqdecp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqdecp", [IsStreamingCompatible]>; +def SVQDECP_U : SInst<"svqdecp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqdecp", [IsStreamingCompatible]>; +def SVQINCP_S : SInst<"svqincp[_{d}]", "ddP", "sil", MergeNone, "aarch64_sve_sqincp", [IsStreamingCompatible]>; +def SVQINCP_U : SInst<"svqincp[_{d}]", "ddP", "UsUiUl", MergeNone, "aarch64_sve_uqincp", [IsStreamingCompatible]>; -def SVQDECP_N_S32 : SInst<"svqdecp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n32">; -def SVQDECP_N_S64 : SInst<"svqdecp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n64">; -def SVQDECP_N_U32 : SInst<"svqdecp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n32">; -def SVQDECP_N_U64 : SInst<"svqdecp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n64">; -def SVQINCP_N_S32 : SInst<"svqincp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n32">; -def SVQINCP_N_S64 : SInst<"svqincp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n64">; -def SVQINCP_N_U32 : SInst<"svqincp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n32">; -def SVQINCP_N_U64 : SInst<"svqincp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n64">; +def SVQDECP_N_S32 : SInst<"svqdecp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n32", [IsStreamingCompatible]>; +def SVQDECP_N_S64 : SInst<"svqdecp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqdecp_n64", [IsStreamingCompatible]>; +def SVQDECP_N_U32 : SInst<"svqdecp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n32", [IsStreamingCompatible]>; +def SVQDECP_N_U64 : SInst<"svqdecp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqdecp_n64", [IsStreamingCompatible]>; +def SVQINCP_N_S32 : SInst<"svqincp[_n_s32]_{d}", "kkP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n32", [IsStreamingCompatible]>; +def SVQINCP_N_S64 : SInst<"svqincp[_n_s64]_{d}", "llP", "PcPsPiPl", MergeNone, "aarch64_sve_sqincp_n64", [IsStreamingCompatible]>; +def SVQINCP_N_U32 : SInst<"svqincp[_n_u32]_{d}", "mmP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n32", [IsStreamingCompatible]>; +def SVQINCP_N_U64 : SInst<"svqincp[_n_u64]_{d}", "nnP", "PcPsPiPl", MergeNone, "aarch64_sve_uqincp_n64", [IsStreamingCompatible]>; let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_INT8)" in { def SVMLLA_S32 : SInst<"svmmla[_s32]", "ddqq","i", MergeNone, "aarch64_sve_smmla">; def SVMLLA_U32 : SInst<"svmmla[_u32]", "ddqq","Ui", MergeNone, "aarch64_sve_ummla">; def SVUSMLLA_S32 : SInst<"svusmmla[_s32]", "ddbq","i", MergeNone, "aarch64_sve_usmmla">; -def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot">; -def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot">; -def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>; -def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT]>; +def SVUSDOT_S : SInst<"svusdot[_s32]", "ddbq", "i", MergeNone, "aarch64_sve_usdot", [IsStreamingCompatible]>; +def SVUSDOT_N_S : SInst<"svusdot[_n_s32]", "ddbr", "i", MergeNone, "aarch64_sve_usdot", [IsStreamingCompatible]>; +def SVSUDOT_S : SInst<"svsudot[_s32]", "ddqb", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, IsStreamingCompatible]>; +def SVSUDOT_N_S : SInst<"svsudot[_n_s32]", "ddq@", "i", MergeNone, "aarch64_sve_usdot", [ReverseUSDOT, IsStreamingCompatible]>; -def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; -def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVUSDOT_LANE_S : SInst<"svusdot_lane[_s32]", "ddbqi", "i", MergeNone, "aarch64_sve_usdot_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; +def SVSUDOT_LANE_S : SInst<"svsudot_lane[_s32]", "ddqbi", "i", MergeNone, "aarch64_sve_sudot_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP32)" in { @@ -1416,12 +1198,12 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { def SVMLLA_F64 : SInst<"svmmla[_f64]", "dddd","d", MergeNone, "aarch64_sve_fmmla">; -def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q">; -def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q">; -def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q">; -def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q">; -def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q">; -def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q">; +def SVTRN1Q : SInst<"svtrn1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1q", [IsStreamingCompatible]>; +def SVTRN2Q : SInst<"svtrn2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2q", [IsStreamingCompatible]>; +def SVUZP1Q : SInst<"svuzp1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp1q", [IsStreamingCompatible]>; +def SVUZP2Q : SInst<"svuzp2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_uzp2q", [IsStreamingCompatible]>; +def SVZIP1Q : SInst<"svzip1q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip1q", [IsStreamingCompatible]>; +def SVZIP2Q : SInst<"svzip2q[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_zip2q", [IsStreamingCompatible]>; } let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64) && defined(__ARM_FEATURE_SVE_BF16)" in { @@ -1478,14 +1260,14 @@ //////////////////////////////////////////////////////////////////////////////// // SVE2 WhileGE/GT let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile]>; -def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile]>; -def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile]>; -def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile]>; -def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile]>; -def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile]>; -def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile]>; -def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile]>; +def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1527,49 +1309,49 @@ } let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl">; -defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl">; -defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl">; -defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl">; -defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl">; -defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl">; -defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd">; -defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd">; - -def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba">; -def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba">; -def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh">; -def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh">; -def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlah">; -def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlsh">; - -def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba">; -def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba">; -def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh">; -def SVQRDMULH_N : SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh">; -def SVQRDMLAH_N : SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah">; -def SVQRDMLSH_N : SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh">; - -def SVQDMULH_LANE : SInst<"svqdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqdmulh_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQRDMULH_LANE : SInst<"svqrdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqrdmulh_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQRDMLAH_LANE : SInst<"svqrdmlah_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlah_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQRDMLSH_LANE : SInst<"svqrdmlsh_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlsh_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; - -def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVQSHLU_X : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeAny, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVQSHLU_Z : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeZero, "aarch64_sve_sqshlu", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVRSHR_M_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_M_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeOp1, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_X_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_X_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeAny, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_Z_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_srshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSHR_Z_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeZero, "aarch64_sve_urshr", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSRA_S : SInst<"svrsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_srsra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVRSRA_U : SInst<"svrsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_ursra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVSLI : SInst<"svsli[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sli", [], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; -def SVSRA_S : SInst<"svsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_ssra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVSRA_U : SInst<"svsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_usra", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVSRI : SInst<"svsri[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sri", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +defm SVQRSHL_S : SInstZPZxZ<"svqrshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqrshl", [IsStreamingCompatible]>; +defm SVQRSHL_U : SInstZPZxZ<"svqrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqrshl", [IsStreamingCompatible]>; +defm SVQSHL_S : SInstZPZxZ<"svqshl", "csil", "dPdx", "dPdK", "aarch64_sve_sqshl", [IsStreamingCompatible]>; +defm SVQSHL_U : SInstZPZxZ<"svqshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_uqshl", [IsStreamingCompatible]>; +defm SVRSHL_S : SInstZPZxZ<"svrshl", "csil", "dPdx", "dPdK", "aarch64_sve_srshl", [IsStreamingCompatible]>; +defm SVRSHL_U : SInstZPZxZ<"svrshl", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_urshl", [IsStreamingCompatible]>; +defm SVSQADD : SInstZPZxZ<"svsqadd", "UcUsUiUl", "dPdx", "dPdK", "aarch64_sve_usqadd", [IsStreamingCompatible]>; +defm SVUQADD : SInstZPZxZ<"svuqadd", "csil", "dPdu", "dPdL", "aarch64_sve_suqadd", [IsStreamingCompatible]>; + +def SVABA_S : SInst<"svaba[_{d}]", "dddd", "csil" , MergeNone, "aarch64_sve_saba", [IsStreamingCompatible]>; +def SVABA_U : SInst<"svaba[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [IsStreamingCompatible]>; +def SVQDMULH : SInst<"svqdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqdmulh", [IsStreamingCompatible]>; +def SVQRDMULH : SInst<"svqrdmulh[_{d}]", "ddd", "csil", MergeNone, "aarch64_sve_sqrdmulh", [IsStreamingCompatible]>; +def SVQRDMLAH : SInst<"svqrdmlah[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlah", [IsStreamingCompatible]>; +def SVQRDMLSH : SInst<"svqrdmlsh[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [IsStreamingCompatible]>; + +def SVABA_S_N : SInst<"svaba[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_saba", [IsStreamingCompatible]>; +def SVABA_U_N : SInst<"svaba[_n_{d}]", "ddda", "UcUsUiUl", MergeNone, "aarch64_sve_uaba", [IsStreamingCompatible]>; +def SVQDMULH_N : SInst<"svqdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqdmulh", [IsStreamingCompatible]>; +def SVQRDMULH_N : SInst<"svqrdmulh[_n_{d}]", "dda", "csil", MergeNone, "aarch64_sve_sqrdmulh", [IsStreamingCompatible]>; +def SVQRDMLAH_N : SInst<"svqrdmlah[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlah", [IsStreamingCompatible]>; +def SVQRDMLSH_N : SInst<"svqrdmlsh[_n_{d}]", "ddda", "csil", MergeNone, "aarch64_sve_sqrdmlsh", [IsStreamingCompatible]>; + +def SVQDMULH_LANE : SInst<"svqdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqdmulh_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQRDMULH_LANE : SInst<"svqrdmulh_lane[_{d}]", "dddi", "sil", MergeNone, "aarch64_sve_sqrdmulh_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQRDMLAH_LANE : SInst<"svqrdmlah_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlah_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQRDMLSH_LANE : SInst<"svqrdmlsh_lane[_{d}]", "ddddi", "sil", MergeNone, "aarch64_sve_sqrdmlsh_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; + +def SVQSHLU_M : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeOp1, "aarch64_sve_sqshlu", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVQSHLU_X : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeAny, "aarch64_sve_sqshlu", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVQSHLU_Z : SInst<"svqshlu[_n_{d}]", "uPdi", "csil", MergeZero, "aarch64_sve_sqshlu", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVRSHR_M_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_srshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_M_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeOp1, "aarch64_sve_urshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_X_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_srshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_X_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeAny, "aarch64_sve_urshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_Z_S : SInst<"svrshr[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_srshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSHR_Z_U : SInst<"svrshr[_n_{d}]", "dPdi", "UcUsUiUl", MergeZero, "aarch64_sve_urshr", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSRA_S : SInst<"svrsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_srsra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVRSRA_U : SInst<"svrsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_ursra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSLI : SInst<"svsli[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sli", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftLeft, 1>]>; +def SVSRA_S : SInst<"svsra[_n_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_ssra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSRA_U : SInst<"svsra[_n_{d}]", "dddi", "UcUsUiUl", MergeNone, "aarch64_sve_usra", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVSRI : SInst<"svsri[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_sri", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1581,29 +1363,29 @@ } let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -defm SVADDP : SInstPairwise<"svaddp", "csliUcUsUiUl", "aarch64_sve_addp">; -defm SVADDP_F : SInstPairwise<"svaddp", "hfd", "aarch64_sve_faddp">; -defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd", "aarch64_sve_fmaxnmp">; -defm SVMAXP_F : SInstPairwise<"svmaxp", "hfd", "aarch64_sve_fmaxp">; -defm SVMAXP_S : SInstPairwise<"svmaxp", "csli", "aarch64_sve_smaxp">; -defm SVMAXP_U : SInstPairwise<"svmaxp", "UcUsUiUl", "aarch64_sve_umaxp">; -defm SVMINNMP : SInstPairwise<"svminnmp", "hfd", "aarch64_sve_fminnmp">; -defm SVMINP_F : SInstPairwise<"svminp", "hfd", "aarch64_sve_fminp">; -defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp">; -defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp">; +defm SVADDP : SInstPairwise<"svaddp", "csliUcUsUiUl", "aarch64_sve_addp", [IsStreamingCompatible]>; +defm SVADDP_F : SInstPairwise<"svaddp", "hfd", "aarch64_sve_faddp", [IsStreamingCompatible]>; +defm SVMAXNMP : SInstPairwise<"svmaxnmp", "hfd", "aarch64_sve_fmaxnmp", [IsStreamingCompatible]>; +defm SVMAXP_F : SInstPairwise<"svmaxp", "hfd", "aarch64_sve_fmaxp", [IsStreamingCompatible]>; +defm SVMAXP_S : SInstPairwise<"svmaxp", "csli", "aarch64_sve_smaxp", [IsStreamingCompatible]>; +defm SVMAXP_U : SInstPairwise<"svmaxp", "UcUsUiUl", "aarch64_sve_umaxp", [IsStreamingCompatible]>; +defm SVMINNMP : SInstPairwise<"svminnmp", "hfd", "aarch64_sve_fminnmp", [IsStreamingCompatible]>; +defm SVMINP_F : SInstPairwise<"svminp", "hfd", "aarch64_sve_fminp", [IsStreamingCompatible]>; +defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp", [IsStreamingCompatible]>; +defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Widening pairwise arithmetic let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp">; -def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp">; -def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp">; +def SVADALP_S_M : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeOp1, "aarch64_sve_sadalp", [IsStreamingCompatible]>; +def SVADALP_S_X : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeAny, "aarch64_sve_sadalp", [IsStreamingCompatible]>; +def SVADALP_S_Z : SInst<"svadalp[_{d}]", "dPdh", "sil", MergeZero, "aarch64_sve_sadalp", [IsStreamingCompatible]>; -def SVADALP_U_M : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeOp1, "aarch64_sve_uadalp">; -def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_sve_uadalp">; -def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_sve_uadalp">; +def SVADALP_U_M : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeOp1, "aarch64_sve_uadalp", [IsStreamingCompatible]>; +def SVADALP_U_X : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeAny, "aarch64_sve_uadalp", [IsStreamingCompatible]>; +def SVADALP_U_Z : SInst<"svadalp[_{d}]", "dPdh", "UsUiUl", MergeZero, "aarch64_sve_uadalp", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1611,56 +1393,56 @@ // let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVBCAX : SInst<"svbcax[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax">; -def SVBSL : SInst<"svbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl">; -def SVBSL1N : SInst<"svbsl1n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n">; -def SVBSL2N : SInst<"svbsl2n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n">; -def SVEOR3 : SInst<"sveor3[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3">; -def SVNBSL : SInst<"svnbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl">; - -def SVBCAX_N : SInst<"svbcax[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax">; -def SVBSL_N : SInst<"svbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl">; -def SVBSL1N_N : SInst<"svbsl1n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n">; -def SVBSL2N_N : SInst<"svbsl2n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n">; -def SVEOR3_N : SInst<"sveor3[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3">; -def SVNBSL_N : SInst<"svnbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl">; -def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_xar", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; +def SVBCAX : SInst<"svbcax[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax", [IsStreamingCompatible]>; +def SVBSL : SInst<"svbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl", [IsStreamingCompatible]>; +def SVBSL1N : SInst<"svbsl1n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n", [IsStreamingCompatible]>; +def SVBSL2N : SInst<"svbsl2n[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n", [IsStreamingCompatible]>; +def SVEOR3 : SInst<"sveor3[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3", [IsStreamingCompatible]>; +def SVNBSL : SInst<"svnbsl[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl", [IsStreamingCompatible]>; + +def SVBCAX_N : SInst<"svbcax[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bcax", [IsStreamingCompatible]>; +def SVBSL_N : SInst<"svbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl", [IsStreamingCompatible]>; +def SVBSL1N_N : SInst<"svbsl1n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl1n", [IsStreamingCompatible]>; +def SVBSL2N_N : SInst<"svbsl2n[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_bsl2n", [IsStreamingCompatible]>; +def SVEOR3_N : SInst<"sveor3[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eor3", [IsStreamingCompatible]>; +def SVNBSL_N : SInst<"svnbsl[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_nbsl", [IsStreamingCompatible]>; +def SVXAR_N : SInst<"svxar[_n_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_xar", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRight, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Large integer arithmetic let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVADCLB : SInst<"svadclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclb">; -def SVADCLT : SInst<"svadclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclt">; -def SVSBCLB : SInst<"svsbclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclb">; -def SVSBCLT : SInst<"svsbclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclt">; +def SVADCLB : SInst<"svadclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclb", [IsStreamingCompatible]>; +def SVADCLT : SInst<"svadclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_adclt", [IsStreamingCompatible]>; +def SVSBCLB : SInst<"svsbclb[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclb", [IsStreamingCompatible]>; +def SVSBCLT : SInst<"svsbclt[_{d}]", "dddd", "UiUl", MergeNone, "aarch64_sve_sbclt", [IsStreamingCompatible]>; -def SVADCLB_N : SInst<"svadclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclb">; -def SVADCLT_N : SInst<"svadclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclt">; -def SVSBCLB_N : SInst<"svsbclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclb">; -def SVSBCLT_N : SInst<"svsbclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclt">; +def SVADCLB_N : SInst<"svadclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclb", [IsStreamingCompatible]>; +def SVADCLT_N : SInst<"svadclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_adclt", [IsStreamingCompatible]>; +def SVSBCLB_N : SInst<"svsbclb[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclb", [IsStreamingCompatible]>; +def SVSBCLT_N : SInst<"svsbclt[_n_{d}]", "ddda", "UiUl", MergeNone, "aarch64_sve_sbclt", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Multiplication by indexed elements let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVMLA_LANE_2 : SInst<"svmla_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLS_LANE_2 : SInst<"svmls_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mls_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi", "silUsUiUl", MergeNone, "aarch64_sve_mul_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMLA_LANE_2 : SInst<"svmla_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mla_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLS_LANE_2 : SInst<"svmls_lane[_{d}]", "ddddi", "silUsUiUl", MergeNone, "aarch64_sve_mls_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMUL_LANE_2 : SInst<"svmul_lane[_{d}]", "dddi", "silUsUiUl", MergeNone, "aarch64_sve_mul_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Uniform complex integer arithmetic let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVCADD : SInst<"svcadd[_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x", [], [ImmCheck<2, ImmCheckComplexRot90_270>]>; -def SVSQCADD : SInst<"svqcadd[_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_sqcadd_x", [], [ImmCheck<2, ImmCheckComplexRot90_270>]>; -def SVCMLA : SInst<"svcmla[_{d}]", "ddddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ddddii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, +def SVCADD : SInst<"svcadd[_{d}]", "dddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cadd_x", [IsStreamingCompatible], [ImmCheck<2, ImmCheckComplexRot90_270>]>; +def SVSQCADD : SInst<"svqcadd[_{d}]", "dddi", "csil", MergeNone, "aarch64_sve_sqcadd_x", [IsStreamingCompatible], [ImmCheck<2, ImmCheckComplexRot90_270>]>; +def SVCMLA : SInst<"svcmla[_{d}]", "ddddi", "csilUcUsUiUl", MergeNone, "aarch64_sve_cmla_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVCMLA_LANE_X : SInst<"svcmla_lane[_{d}]", "ddddii", "siUsUi", MergeNone, "aarch64_sve_cmla_lane_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; -def SVSQRDCMLAH_X : SInst<"svqrdcmlah[_{d}]", "ddddi", "csil", MergeNone, "aarch64_sve_sqrdcmlah_x", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, +def SVSQRDCMLAH_X : SInst<"svqrdcmlah[_{d}]", "ddddi", "csil", MergeNone, "aarch64_sve_sqrdcmlah_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVSQRDCMLAH_LANE_X : SInst<"svqrdcmlah_lane[_{d}]", "ddddii", "si", MergeNone, "aarch64_sve_sqrdcmlah_lane_x", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>, ImmCheck<4, ImmCheckComplexRotAll90>]>; } @@ -1668,18 +1450,18 @@ // SVE2 - Widening DSP operations multiclass SInstWideDSPAcc { - def : SInst; - def _N : SInst; + def : SInst; + def _N : SInst; } multiclass SInstWideDSPLong { - def : SInst; - def _N : SInst; + def : SInst; + def _N : SInst; } multiclass SInstWideDSPWide { - def : SInst; - def _N : SInst; + def : SInst; + def _N : SInst; } let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { @@ -1728,87 +1510,87 @@ defm SVSUBWT_S : SInstWideDSPWide<"svsubwt", "sil", "aarch64_sve_ssubwt">; defm SVSUBWT_U : SInstWideDSPWide<"svsubwt", "UsUiUl", "aarch64_sve_usubwt">; -def SVSHLLB_S_N : SInst<"svshllb[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllb", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVSHLLB_U_N : SInst<"svshllb[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllb", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVSHLLT_S_N : SInst<"svshllt[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllt", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; -def SVSHLLT_U_N : SInst<"svshllt[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllt", [], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; - -def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil", MergeNone>; -def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone>; -def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil", MergeNone>; -def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone>; - -def SVMLALB_S_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALB_U_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_S_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_U_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_S_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_U_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_S_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_U_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMULLB_S_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullb_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVMULLB_U_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullb_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVMULLT_S_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullt_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVMULLT_U_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullt_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQDMLALB_LANE : SInst<"svqdmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMLALT_LANE : SInst<"svqdmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMLSLB_LANE : SInst<"svqdmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMLSLT_LANE : SInst<"svqdmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVQDMULLB_LANE : SInst<"svqdmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullb_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; -def SVQDMULLT_LANE : SInst<"svqdmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullt_lane", [], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVSHLLB_S_N : SInst<"svshllb[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVSHLLB_U_N : SInst<"svshllb[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVSHLLT_S_N : SInst<"svshllt[_n_{d}]", "dhi", "sil", MergeNone, "aarch64_sve_sshllt", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; +def SVSHLLT_U_N : SInst<"svshllt[_n_{d}]", "dhi", "UsUiUl", MergeNone, "aarch64_sve_ushllt", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftLeft, 0>]>; + +def SVMOVLB_S_N : SInst<"svmovlb[_{d}]", "dh", "sil", MergeNone, "", [IsStreamingCompatible]>; +def SVMOVLB_U_N : SInst<"svmovlb[_{d}]", "dh", "UsUiUl", MergeNone, "", [IsStreamingCompatible]>; +def SVMOVLT_S_N : SInst<"svmovlt[_{d}]", "dh", "sil", MergeNone, "", [IsStreamingCompatible]>; +def SVMOVLT_U_N : SInst<"svmovlt[_{d}]", "dh", "UsUiUl", MergeNone, "", [IsStreamingCompatible]>; + +def SVMLALB_S_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALB_U_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_S_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_U_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_S_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_U_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_S_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_smlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_U_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "UiUl", MergeNone, "aarch64_sve_umlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMULLB_S_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullb_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMULLB_U_LANE : SInst<"svmullb_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullb_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMULLT_S_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_smullt_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVMULLT_U_LANE : SInst<"svmullt_lane[_{d}]", "dhhi", "UiUl", MergeNone, "aarch64_sve_umullt_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQDMLALB_LANE : SInst<"svqdmlalb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMLALT_LANE : SInst<"svqdmlalt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMLSLB_LANE : SInst<"svqdmlslb_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMLSLT_LANE : SInst<"svqdmlslt_lane[_{d}]", "ddhhi", "il", MergeNone, "aarch64_sve_sqdmlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVQDMULLB_LANE : SInst<"svqdmullb_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullb_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; +def SVQDMULLT_LANE : SInst<"svqdmullt_lane[_{d}]", "dhhi", "il", MergeNone, "aarch64_sve_sqdmullt_lane", [IsStreamingCompatible], [ImmCheck<2, ImmCheckLaneIndex, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Narrowing DSP operations let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVADDHNB : SInst<"svaddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnb">; -def SVADDHNT : SInst<"svaddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt">; -def SVRADDHNB : SInst<"svraddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb">; -def SVRADDHNT : SInst<"svraddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt">; -def SVRSUBHNB : SInst<"svrsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb">; -def SVRSUBHNT : SInst<"svrsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt">; -def SVSUBHNB : SInst<"svsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnb">; -def SVSUBHNT : SInst<"svsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnt">; - -def SVADDHNB_N : SInst<"svaddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_addhnb">; -def SVADDHNT_N : SInst<"svaddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_addhnt">; -def SVRADDHNB_N : SInst<"svraddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb">; -def SVRADDHNT_N : SInst<"svraddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt">; -def SVRSUBHNB_N : SInst<"svrsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb">; -def SVRSUBHNT_N : SInst<"svrsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt">; -def SVSUBHNB_N : SInst<"svsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_subhnb">; -def SVSUBHNT_N : SInst<"svsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_subhnt">; - -def SVSHRNB : SInst<"svshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVRSHRNB : SInst<"svrshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRUNB : SInst<"svqshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqshrunb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRUNB : SInst<"svqrshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqrshrunb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRNB_S : SInst<"svqshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQSHRNB_U : SInst<"svqshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRNB_S : SInst<"svqrshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqrshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; -def SVQRSHRNB_U : SInst<"svqrshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnb", [], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; - -def SVSHRNT : SInst<"svshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVRSHRNT : SInst<"svrshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRUNT : SInst<"svqshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqshrunt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRUNT : SInst<"svqrshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqrshrunt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRNT_S : SInst<"svqshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQSHRNT_U : SInst<"svqshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRNT_S : SInst<"svqrshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqrshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; -def SVQRSHRNT_U : SInst<"svqrshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnt", [], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVADDHNB : SInst<"svaddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [IsStreamingCompatible]>; +def SVADDHNT : SInst<"svaddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [IsStreamingCompatible]>; +def SVRADDHNB : SInst<"svraddhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [IsStreamingCompatible]>; +def SVRADDHNT : SInst<"svraddhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt", [IsStreamingCompatible]>; +def SVRSUBHNB : SInst<"svrsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb", [IsStreamingCompatible]>; +def SVRSUBHNT : SInst<"svrsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt", [IsStreamingCompatible]>; +def SVSUBHNB : SInst<"svsubhnb[_{d}]", "hdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [IsStreamingCompatible]>; +def SVSUBHNT : SInst<"svsubhnt[_{d}]", "hhdd", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [IsStreamingCompatible]>; + +def SVADDHNB_N : SInst<"svaddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_addhnb", [IsStreamingCompatible]>; +def SVADDHNT_N : SInst<"svaddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_addhnt", [IsStreamingCompatible]>; +def SVRADDHNB_N : SInst<"svraddhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnb", [IsStreamingCompatible]>; +def SVRADDHNT_N : SInst<"svraddhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_raddhnt", [IsStreamingCompatible]>; +def SVRSUBHNB_N : SInst<"svrsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnb", [IsStreamingCompatible]>; +def SVRSUBHNT_N : SInst<"svrsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_rsubhnt", [IsStreamingCompatible]>; +def SVSUBHNB_N : SInst<"svsubhnb[_n_{d}]", "hda", "silUsUiUl", MergeNone, "aarch64_sve_subhnb", [IsStreamingCompatible]>; +def SVSUBHNT_N : SInst<"svsubhnt[_n_{d}]", "hhda", "silUsUiUl", MergeNone, "aarch64_sve_subhnt", [IsStreamingCompatible]>; + +def SVSHRNB : SInst<"svshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVRSHRNB : SInst<"svrshrnb[_n_{d}]", "hdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRUNB : SInst<"svqshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqshrunb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRUNB : SInst<"svqrshrunb[_n_{d}]", "edi", "sil", MergeNone, "aarch64_sve_sqrshrunb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRNB_S : SInst<"svqshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQSHRNB_U : SInst<"svqshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRNB_S : SInst<"svqrshrnb[_n_{d}]", "hdi", "sil", MergeNone, "aarch64_sve_sqrshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; +def SVQRSHRNB_U : SInst<"svqrshrnb[_n_{d}]", "hdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnb", [IsStreamingCompatible], [ImmCheck<1, ImmCheckShiftRightNarrow, 0>]>; + +def SVSHRNT : SInst<"svshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_shrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVRSHRNT : SInst<"svrshrnt[_n_{d}]", "hhdi", "silUsUiUl", MergeNone, "aarch64_sve_rshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRUNT : SInst<"svqshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqshrunt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRUNT : SInst<"svqrshrunt[_n_{d}]", "eedi", "sil", MergeNone, "aarch64_sve_sqrshrunt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRNT_S : SInst<"svqshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQSHRNT_U : SInst<"svqshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRNT_S : SInst<"svqrshrnt[_n_{d}]", "hhdi", "sil", MergeNone, "aarch64_sve_sqrshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; +def SVQRSHRNT_U : SInst<"svqrshrnt[_n_{d}]", "hhdi", "UsUiUl", MergeNone, "aarch64_sve_uqrshrnt", [IsStreamingCompatible], [ImmCheck<2, ImmCheckShiftRightNarrow, 1>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Unary narrowing operations let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVQXTNB_S : SInst<"svqxtnb[_{d}]", "hd", "sil", MergeNone, "aarch64_sve_sqxtnb">; -def SVQXTNB_U : SInst<"svqxtnb[_{d}]", "hd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnb">; -def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed", "sil", MergeNone, "aarch64_sve_sqxtunb">; +def SVQXTNB_S : SInst<"svqxtnb[_{d}]", "hd", "sil", MergeNone, "aarch64_sve_sqxtnb", [IsStreamingCompatible]>; +def SVQXTNB_U : SInst<"svqxtnb[_{d}]", "hd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnb", [IsStreamingCompatible]>; +def SVQXTUNB_S : SInst<"svqxtunb[_{d}]", "ed", "sil", MergeNone, "aarch64_sve_sqxtunb", [IsStreamingCompatible]>; -def SVQXTNT_S : SInst<"svqxtnt[_{d}]", "hhd", "sil", MergeNone, "aarch64_sve_sqxtnt">; -def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnt">; -def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt">; +def SVQXTNT_S : SInst<"svqxtnt[_{d}]", "hhd", "sil", MergeNone, "aarch64_sve_sqxtnt", [IsStreamingCompatible]>; +def SVQXTNT_U : SInst<"svqxtnt[_{d}]", "hhd", "UsUiUl", MergeNone, "aarch64_sve_uqxtnt", [IsStreamingCompatible]>; +def SVQXTUNT_S : SInst<"svqxtunt[_{d}]", "eed", "sil", MergeNone, "aarch64_sve_sqxtunt", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -1949,18 +1731,19 @@ // SVE2 - Polynomial arithmetic let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVEORBT : SInst<"sveorbt[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">; -def SVEORBT_N : SInst<"sveorbt[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt">; -def SVEORTB : SInst<"sveortb[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">; -def SVEORTB_N : SInst<"sveortb[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb">; -def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul">; -def SVPMUL_N : SInst<"svpmul[_n_{d}]", "dda", "Uc", MergeNone, "aarch64_sve_pmul">; -def SVPMULLB : SInst<"svpmullb[_{d}]", "dhh", "UsUl", MergeNone>; -def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone>; -def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair">; -def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair">; -def SVPMULLT : SInst<"svpmullt[_{d}]", "dhh", "UsUl", MergeNone>; -def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone>; +def SVEORBT : SInst<"sveorbt[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [IsStreamingCompatible]>; +def SVEORBT_N : SInst<"sveorbt[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eorbt", [IsStreamingCompatible]>; +def SVEORTB : SInst<"sveortb[_{d}]", "dddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [IsStreamingCompatible]>; +def SVEORTB_N : SInst<"sveortb[_n_{d}]", "ddda", "csilUcUsUiUl", MergeNone, "aarch64_sve_eortb", [IsStreamingCompatible]>; +def SVPMUL : SInst<"svpmul[_{d}]", "ddd", "Uc", MergeNone, "aarch64_sve_pmul", [IsStreamingCompatible]>; +def SVPMUL_N : SInst<"svpmul[_n_{d}]", "dda", "Uc", MergeNone, "aarch64_sve_pmul", [IsStreamingCompatible]>; +def SVPMULLB : SInst<"svpmullb[_{d}]", "dhh", "UsUl", MergeNone, "", [IsStreamingCompatible]>; +def SVPMULLB_N : SInst<"svpmullb[_n_{d}]", "dhR", "UsUl", MergeNone, "", [IsStreamingCompatible]>; +// TODO: Are these AES instructions?! +def SVPMULLB_PAIR : SInst<"svpmullb_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [IsStreamingCompatible]>; +def SVPMULLB_PAIR_N : SInst<"svpmullb_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullb_pair", [IsStreamingCompatible]>; +def SVPMULLT : SInst<"svpmullt[_{d}]", "dhh", "UsUl", MergeNone, "", [IsStreamingCompatible]>; +def SVPMULLT_N : SInst<"svpmullt[_n_{d}]", "dhR", "UsUl", MergeNone, "", [IsStreamingCompatible]>; def SVPMULLT_PAIR : SInst<"svpmullt_pair[_{d}]", "ddd", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; def SVPMULLT_PAIR_N : SInst<"svpmullt_pair[_n_{d}]", "dda", "UcUi", MergeNone, "aarch64_sve_pmullt_pair">; } @@ -1969,8 +1752,8 @@ // SVE2 - Complex integer dot product let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVCDOT : SInst<"svcdot[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_cdot", [], [ImmCheck<3, ImmCheckComplexRotAll90>]>; -def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [], [ImmCheck<4, ImmCheckComplexRotAll90>, +def SVCDOT : SInst<"svcdot[_{d}]", "ddqqi", "il", MergeNone, "aarch64_sve_cdot", [IsStreamingCompatible], [ImmCheck<3, ImmCheckComplexRotAll90>]>; +def SVCDOT_LANE : SInst<"svcdot_lane[_{d}]", "ddqqii", "il", MergeNone, "aarch64_sve_cdot_lane", [IsStreamingCompatible], [ImmCheck<4, ImmCheckComplexRotAll90>, ImmCheck<3, ImmCheckLaneIndexDot, 2>]>; } @@ -1978,27 +1761,27 @@ // SVE2 - Floating-point widening multiply-accumulate let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb">; -def SVMLALB_F_N : SInst<"svmlalb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalb">; -def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt">; -def SVMLALT_F_N : SInst<"svmlalt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalt">; -def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb">; -def SVMLSLB_F_N : SInst<"svmlslb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslb">; -def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; -def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt">; -def SVMLSLT_F_N : SInst<"svmlslt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslt">; -def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALB_F : SInst<"svmlalb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalb", [IsStreamingCompatible]>; +def SVMLALB_F_N : SInst<"svmlalb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalb", [IsStreamingCompatible]>; +def SVMLALB_F_LANE : SInst<"svmlalb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLALT_F : SInst<"svmlalt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlalt", [IsStreamingCompatible]>; +def SVMLALT_F_N : SInst<"svmlalt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlalt", [IsStreamingCompatible]>; +def SVMLALT_F_LANE : SInst<"svmlalt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlalt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLB_F : SInst<"svmlslb[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslb", [IsStreamingCompatible]>; +def SVMLSLB_F_N : SInst<"svmlslb[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslb", [IsStreamingCompatible]>; +def SVMLSLB_F_LANE : SInst<"svmlslb_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslb_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; +def SVMLSLT_F : SInst<"svmlslt[_{d}]", "ddhh", "f", MergeNone, "aarch64_sve_fmlslt", [IsStreamingCompatible]>; +def SVMLSLT_F_N : SInst<"svmlslt[_n_{d}]", "ddhR", "f", MergeNone, "aarch64_sve_fmlslt", [IsStreamingCompatible]>; +def SVMLSLT_F_LANE : SInst<"svmlslt_lane[_{d}]", "ddhhi", "f", MergeNone, "aarch64_sve_fmlslt_lane", [IsStreamingCompatible], [ImmCheck<3, ImmCheckLaneIndex, 2>]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Floating-point integer binary logarithm let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb">; -def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb">; -def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb">; +def SVLOGB_M : SInst<"svlogb[_{d}]", "xxPd", "hfd", MergeOp1, "aarch64_sve_flogb", [IsStreamingCompatible]>; +def SVLOGB_X : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeAnyExp, "aarch64_sve_flogb", [IsStreamingCompatible]>; +def SVLOGB_Z : SInst<"svlogb[_{d}]", "xPd", "hfd", MergeZeroExp, "aarch64_sve_flogb", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -2020,32 +1803,32 @@ //////////////////////////////////////////////////////////////////////////////// // SVE2 - Contiguous conflict detection let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW]>; -def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; -def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW]>; -def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW]>; +def SVWHILERW_B : SInst<"svwhilerw[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilerw_b", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILERW_H : SInst<"svwhilerw[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILERW_S : SInst<"svwhilerw[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilerw_s", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILERW_D : SInst<"svwhilerw[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilerw_d", [IsOverloadWhileRW, IsStreamingCompatible]>; -def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW]>; -def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; -def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW]>; -def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; +def SVWHILEWR_B : SInst<"svwhilewr[_{1}]", "Pcc", "cUc", MergeNone, "aarch64_sve_whilewr_b", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_H : SInst<"svwhilewr[_{1}]", "Pcc", "sUsh", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_S : SInst<"svwhilewr[_{1}]", "Pcc", "iUif", MergeNone, "aarch64_sve_whilewr_s", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW, IsStreamingCompatible]>; } let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC)" in { -def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; -def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; +def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW, IsStreamingCompatible]>; +def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW, IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// // SVE2 - Extended table lookup/permute let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { -def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfd", MergeNone>; -def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; +def SVTBL2 : SInst<"svtbl2[_{d}]", "d2u", "csilUcUsUiUlhfd", MergeNone, "", [IsStreamingCompatible]>; +def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx", [IsStreamingCompatible]>; } let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in { -def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone>; -def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx">; +def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone, "", [IsStreamingCompatible]>; +def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx", [IsStreamingCompatible]>; } //////////////////////////////////////////////////////////////////////////////// @@ -2081,3 +1864,97 @@ def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; } + +//////////////////////////////////////////////////////////////////////////////// +// SME + +let ArchGuard = "defined(__ARM_FEATURE_SME)" in { +def SVSCLAMP : SInst<"svsclamp[_{d}]", "dddd", "csil", MergeNone, "aarch64_sve_sclamp">; +def SVUCLAMP : SInst<"svuclamp[_{d}]", "dddd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp">; + +defm SVREVD : SInstZPZ<"svrevd", "csilUcUsUiUl", "aarch64_sve_revd">; + +def SVPSEL : SInst<"svpsel[_{d}]", "PPPk", "PcPsPiPl", MergeNone, "aarch64_sve_psel">; +} + + +let ArchGuard = "defined(__ARM_FEATURE_SME2)" in { +def SVPTRUE_COUNT_C8 : SInst<"svptrue_{d}", "}v", "Qc", MergeNone, "aarch64_sve_ptrue_c8", [IsStreaming, IsOverloadNone], []>; +def SVPTRUE_COUNT_C16 : SInst<"svptrue_{d}", "}v", "Qs", MergeNone, "aarch64_sve_ptrue_c16", [IsStreaming, IsOverloadNone], []>; +def SVPTRUE_COUNT_C32 : SInst<"svptrue_{d}", "}v", "Qi", MergeNone, "aarch64_sve_ptrue_c32", [IsStreaming, IsOverloadNone], []>; +def SVPTRUE_COUNT_C64 : SInst<"svptrue_{d}", "}v", "Ql", MergeNone, "aarch64_sve_ptrue_c64", [IsStreaming, IsOverloadNone], []>; + +def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreaming], [ImmCheck<1, ImmCheck0_3>]>; + +def SVLD1B_VG2 : MInst<"svld1b[_{2}]_x2", "2}c", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1H_VG2 : MInst<"svld1h[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1W_VG2 : MInst<"svld1w[_{2}]_x2", "2}c", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1D_VG2 : MInst<"svld1d[_{2}]_x2", "2}c", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1B_VG4 : MInst<"svld1b[_{2}]_x4", "4}c", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; +def SVLD1H_VG4 : MInst<"svld1h[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; +def SVLD1W_VG4 : MInst<"svld1w[_{2}]_x4", "4}c", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; +def SVLD1D_VG4 : MInst<"svld1d[_{2}]_x4", "4}c", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; + +def SVLDNT1B_VG2 : MInst<"svldnt1b[_{2}]_x2", "2}c", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1H_VG2 : MInst<"svldnt1h[_{2}]_x2", "2}c", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1W_VG2 : MInst<"svldnt1w[_{2}]_x2", "2}c", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1D_VG2 : MInst<"svldnt1d[_{2}]_x2", "2}c", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1B_VG4 : MInst<"svldnt1b[_{2}]_x4", "4}c", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; +def SVLDNT1H_VG4 : MInst<"svldnt1h[_{2}]_x4", "4}c", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; +def SVLDNT1W_VG4 : MInst<"svldnt1w[_{2}]_x4", "4}c", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; +def SVLDNT1D_VG4 : MInst<"svldnt1d[_{2}]_x4", "4}c", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; + +def SVLD1B_VNUM_VG2 : MInst<"svld1b_vnum[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1H_VNUM_VG2 : MInst<"svld1h_vnum[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1W_VNUM_VG2 : MInst<"svld1w_vnum[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1D_VNUM_VG2 : MInst<"svld1d_vnum[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg2">; +def SVLD1B_VNUM_VG4 : MInst<"svld1b_vnum[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; +def SVLD1H_VNUM_VG4 : MInst<"svld1h_vnum[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; +def SVLD1W_VNUM_VG4 : MInst<"svld1w_vnum[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; +def SVLD1D_VNUM_VG4 : MInst<"svld1d_vnum[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ld1_pn_vg4">; + +def SVLDNT1B_VNUM_VG2 : MInst<"svldnt1b_vnum[_{2}]_x2", "2}cl", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1H_VNUM_VG2 : MInst<"svldnt1h_vnum[_{2}]_x2", "2}cl", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1W_VNUM_VG2 : MInst<"svldnt1w_vnum[_{2}]_x2", "2}cl", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1D_VNUM_VG2 : MInst<"svldnt1d_vnum[_{2}]_x2", "2}cl", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg2">; +def SVLDNT1B_VNUM_VG4 : MInst<"svldnt1b_vnum[_{2}]_x4", "4}cl", "cUc", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; +def SVLDNT1H_VNUM_VG4 : MInst<"svldnt1h_vnum[_{2}]_x4", "4}cl", "sUshb", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; +def SVLDNT1W_VNUM_VG4 : MInst<"svldnt1w_vnum[_{2}]_x4", "4}cl", "iUif", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; +def SVLDNT1D_VNUM_VG4 : MInst<"svldnt1d_vnum[_{2}]_x4", "4}cl", "lUld", [IsStructLoad, IsStreaming], MemEltTyDefault, "aarch64_sve_ldnt1_pn_vg4">; + +def SVST1B_VG2 : MInst<"svst1b[_{2}]_x2", "v}p2", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1H_VG2 : MInst<"svst1h[_{2}]_x2", "v}p2", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1W_VG2 : MInst<"svst1w[_{2}]_x2", "v}p2", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1D_VG2 : MInst<"svst1d[_{2}]_x2", "v}p2", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1B_VG4 : MInst<"svst1b[_{2}]_x4", "v}p4", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; +def SVST1H_VG4 : MInst<"svst1h[_{2}]_x4", "v}p4", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; +def SVST1W_VG4 : MInst<"svst1w[_{2}]_x4", "v}p4", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; +def SVST1D_VG4 : MInst<"svst1d[_{2}]_x4", "v}p4", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; + +def SVST1B_VNUM_VG2 : MInst<"svst1b_vnum[_{2}]_x2", "v}pl2", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1H_VNUM_VG2 : MInst<"svst1h_vnum[_{2}]_x2", "v}pl2", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1W_VNUM_VG2 : MInst<"svst1w_vnum[_{2}]_x2", "v}pl2", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1D_VNUM_VG2 : MInst<"svst1d_vnum[_{2}]_x2", "v}pl2", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg2">; +def SVST1B_VNUM_VG4 : MInst<"svst1b_vnum[_{2}]_x4", "v}pl4", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; +def SVST1H_VNUM_VG4 : MInst<"svst1h_vnum[_{2}]_x4", "v}pl4", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; +def SVST1W_VNUM_VG4 : MInst<"svst1w_vnum[_{2}]_x4", "v}pl4", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; +def SVST1D_VNUM_VG4 : MInst<"svst1d_vnum[_{2}]_x4", "v}pl4", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_st1_pn_vg4">; + +def SVSTNT1B_VG2 : MInst<"svstnt1b[_{2}]_x2", "v}p2", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1H_VG2 : MInst<"svstnt1h[_{2}]_x2", "v}p2", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1W_VG2 : MInst<"svstnt1w[_{2}]_x2", "v}p2", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1D_VG2 : MInst<"svstnt1d[_{2}]_x2", "v}p2", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1B_VG4 : MInst<"svstnt1b[_{2}]_x4", "v}p4", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +def SVSTNT1H_VG4 : MInst<"svstnt1h[_{2}]_x4", "v}p4", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +def SVSTNT1W_VG4 : MInst<"svstnt1w[_{2}]_x4", "v}p4", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +def SVSTNT1D_VG4 : MInst<"svstnt1d[_{2}]_x4", "v}p4", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; + +def SVSTNT1B_VNUM_VG2 : MInst<"svstnt1b_vnum[_{2}]_x2", "v}pl2", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1H_VNUM_VG2 : MInst<"svstnt1h_vnum[_{2}]_x2", "v}pl2", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1W_VNUM_VG2 : MInst<"svstnt1w_vnum[_{2}]_x2", "v}pl2", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1D_VNUM_VG2 : MInst<"svstnt1d_vnum[_{2}]_x2", "v}pl2", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg2">; +def SVSTNT1B_VNUM_VG4 : MInst<"svstnt1b_vnum[_{2}]_x4", "v}pl4", "cUc", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +def SVSTNT1H_VNUM_VG4 : MInst<"svstnt1h_vnum[_{2}]_x4", "v}pl4", "sUshb", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +def SVSTNT1W_VNUM_VG4 : MInst<"svstnt1w_vnum[_{2}]_x4", "v}pl4", "iUif", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +def SVSTNT1D_VNUM_VG4 : MInst<"svstnt1d_vnum[_{2}]_x4", "v}pl4", "lUld", [IsStructStore, IsStreaming], MemEltTyDefault, "aarch64_sve_stnt1_pn_vg4">; +} diff --git a/clang/include/clang/Basic/arm_sve_common.td b/clang/include/clang/Basic/arm_sve_common.td new file mode 100644 --- /dev/null +++ b/clang/include/clang/Basic/arm_sve_common.td @@ -0,0 +1,246 @@ +//===----------------------------------------------------------------------===// +// Instruction definitions +//===----------------------------------------------------------------------===// +// Every intrinsic subclasses "Inst". An intrinsic has a name, a prototype and +// a sequence of typespecs. +// +// The name is the base name of the intrinsic, for example "svld1". This is +// then mangled by the tblgen backend to add type information ("svld1_s16"). +// +// A typespec is a sequence of uppercase characters (modifiers) followed by one +// lowercase character. A typespec encodes a particular "base type" of the +// intrinsic. +// +// An example typespec is "Us" - unsigned short - svuint16_t. The available +// typespec codes are given below. +// +// The string given to an Inst class is a sequence of typespecs. The intrinsic +// is instantiated for every typespec in the sequence. For example "sdUsUd". +// +// The prototype is a string that defines the return type of the intrinsic +// and the type of each argument. The return type and every argument gets a +// "modifier" that can change in some way the "base type" of the intrinsic. +// +// The modifier 'd' means "default" and does not modify the base type in any +// way. The available modifiers are given below. +// +// Typespecs +// --------- +// c: char +// s: short +// i: int +// l: long +// f: float +// h: half-float +// d: double +// b: bfloat + +// Typespec modifiers +// ------------------ +// P: boolean +// Q: svcount +// U: unsigned + +// Prototype modifiers +// ------------------- +// prototype: return (arg, arg, ...) +// +// 2,3,4: array of default vectors +// v: void +// x: vector of signed integers +// u: vector of unsigned integers +// d: default +// c: const pointer type +// P: predicate type +// s: scalar of element type +// a: scalar of element type (splat to vector type) +// R: scalar of 1/2 width element type (splat to vector type) +// r: scalar of 1/4 width element type (splat to vector type) +// @: unsigned scalar of 1/4 width element type (splat to vector type) +// e: 1/2 width unsigned elements, 2x element count +// b: 1/4 width unsigned elements, 4x element count +// h: 1/2 width elements, 2x element count +// q: 1/4 width elements, 4x element count +// o: 4x width elements, 1/4 element count +// +// w: vector of element type promoted to 64bits, vector maintains +// signedness of its element type. +// f: element type promoted to uint64_t (splat to vector type) +// j: element type promoted to 64bits (splat to vector type) +// K: element type bitcast to a signed integer (splat to vector type) +// L: element type bitcast to an unsigned integer (splat to vector type) +// +// i: constant uint64_t +// k: int32_t +// l: int64_t +// m: uint32_t +// n: uint64_t +// y: bool + +// t: svint32_t +// z: svuint32_t +// g: svuint64_t +// O: svfloat16_t +// M: svfloat32_t +// N: svfloat64_t + +// J: Prefetch type (sv_prfop) +// p: pointer to element type +// A: pointer to int8_t +// B: pointer to int16_t +// C: pointer to int32_t +// D: pointer to int64_t + +// E: pointer to uint8_t +// F: pointer to uint16_t +// G: pointer to uint32_t +// H: pointer to uint64_t + +// Q: const pointer to void + +// S: const pointer to int8_t +// T: const pointer to int16_t +// U: const pointer to int32_t +// V: const pointer to int64_t +// +// W: const pointer to uint8_t +// X: const pointer to uint16_t +// Y: const pointer to uint32_t +// Z: const pointer to uint64_t + +// Prototype modifiers added for SME +// {: pointer to void +// +// Prototype modifiers added for SVE2p1 +// }: svcount_t + +class MergeType { + int Value = val; + string Suffix = suffix; +} +def MergeNone : MergeType<0>; +def MergeAny : MergeType<1, "_x">; +def MergeOp1 : MergeType<2, "_m">; +def MergeZero : MergeType<3, "_z">; +def MergeAnyExp : MergeType<4, "_x">; // Use merged builtin with explicit +def MergeZeroExp : MergeType<5, "_z">; // generation of its inactive argument. + +class EltType { + int Value = val; +} +def EltTyInvalid : EltType<0>; +def EltTyInt8 : EltType<1>; +def EltTyInt16 : EltType<2>; +def EltTyInt32 : EltType<3>; +def EltTyInt64 : EltType<4>; +def EltTyFloat16 : EltType<5>; +def EltTyFloat32 : EltType<6>; +def EltTyFloat64 : EltType<7>; +def EltTyBool8 : EltType<8>; +def EltTyBool16 : EltType<9>; +def EltTyBool32 : EltType<10>; +def EltTyBool64 : EltType<11>; +def EltTyBFloat16 : EltType<12>; + +class MemEltType { + int Value = val; +} +def MemEltTyDefault : MemEltType<0>; +def MemEltTyInt8 : MemEltType<1>; +def MemEltTyInt16 : MemEltType<2>; +def MemEltTyInt32 : MemEltType<3>; +def MemEltTyInt64 : MemEltType<4>; +def MemEltTyInt128 : MemEltType<5>; + +class FlagType { + int Value = val; +} + +// These must be kept in sync with the flags in utils/TableGen/SveEmitter.cpp +// and include/clang/Basic/TargetBuiltins.h +def NoFlags : FlagType<0x00000000>; +def FirstEltType : FlagType<0x00000001>; +// : : +// : : +def EltTypeMask : FlagType<0x0000000f>; +def FirstMemEltType : FlagType<0x00000010>; +// : : +// : : +def MemEltTypeMask : FlagType<0x00000070>; +def FirstMergeTypeMask : FlagType<0x00000080>; +// : : +// : : +def MergeTypeMask : FlagType<0x00000380>; +def FirstSplatOperand : FlagType<0x00000400>; +// : : +// These flags are used to specify which scalar operand +// needs to be duplicated/splatted into a vector. +// : : +def SplatOperandMask : FlagType<0x00001C00>; +def IsLoad : FlagType<0x00002000>; +def IsStore : FlagType<0x00004000>; +def IsGatherLoad : FlagType<0x00008000>; +def IsScatterStore : FlagType<0x00010000>; +def IsStructLoad : FlagType<0x00020000>; +def IsStructStore : FlagType<0x00040000>; +def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default +def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types. +def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. +def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. +def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. +def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. +def IsByteIndexed : FlagType<0x01000000>; +def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. +def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. +def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. +def IsGatherPrefetch : FlagType<0x10000000>; +def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. +def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. +def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type. +def IsTupleCreate : FlagType<0x100000000>; +def IsTupleGet : FlagType<0x200000000>; +def IsTupleSet : FlagType<0x400000000>; +def IsStreaming : FlagType<0x800000000>; +def IsStreamingCompatible : FlagType<0x1000000000>; +def IsSharedZA : FlagType<0x2000000000>; +def IsPreservedZA : FlagType<0x4000000000>; +def IsReadZA : FlagType<0x8000000000>; +def IsWriteZA : FlagType<0x10000000000>; +def IsArmInStreamingMode : FlagType<0x20000000000>; +def IsZASliceBaseOffsetIntr : FlagType<0x40000000000>; + +// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h +class ImmCheckType { + int Value = val; +} +def ImmCheck0_31 : ImmCheckType<0>; // 0..31 (used for e.g. predicate patterns) +def ImmCheck1_16 : ImmCheckType<1>; // 1..16 +def ImmCheckExtract : ImmCheckType<2>; // 0..(2048/sizeinbits(elt) - 1) +def ImmCheckShiftRight : ImmCheckType<3>; // 1..sizeinbits(elt) +def ImmCheckShiftRightNarrow : ImmCheckType<4>; // 1..sizeinbits(elt)/2 +def ImmCheckShiftLeft : ImmCheckType<5>; // 0..(sizeinbits(elt) - 1) +def ImmCheck0_7 : ImmCheckType<6>; // 0..7 +def ImmCheckLaneIndex : ImmCheckType<7>; // 0..(128/(1*sizeinbits(elt)) - 1) +def ImmCheckLaneIndexCompRotate : ImmCheckType<8>; // 0..(128/(2*sizeinbits(elt)) - 1) +def ImmCheckLaneIndexDot : ImmCheckType<9>; // 0..(128/(4*sizeinbits(elt)) - 1) +def ImmCheckComplexRot90_270 : ImmCheckType<10>; // [90,270] +def ImmCheckComplexRotAll90 : ImmCheckType<11>; // [0, 90, 180,270] +def ImmCheck0_13 : ImmCheckType<12>; // 0..13 +def ImmCheck0_1 : ImmCheckType<13>; // 0..1 +def ImmCheck0_2 : ImmCheckType<14>; // 0..2 +def ImmCheck0_3 : ImmCheckType<15>; // 0..3 +def ImmCheck0 : ImmCheckType<16>; // Must be 0. +def ImmCheck0_15 : ImmCheckType<17>; // 0..15 +def ImmCheck0_255 : ImmCheckType<18>; // 0..255 +def ImmCheck0_2_Mul2 : ImmCheckType<19>; // 0, 2 +def ImmCheck0_6_Mul2 : ImmCheckType<20>; // 0, 2, .., 6 +def ImmCheck0_14_Mul2 : ImmCheckType<21>; // 0, 2, .., 14 +def ImmCheck0_4_Mul4 : ImmCheckType<22>; // 0, 4 +def ImmCheck0_12_Mul4 : ImmCheckType<23>; // 0, 4, 8, 12 + + +class ImmCheck { + int Arg = arg; + int EltSizeArg = eltSizeArg; + ImmCheckType Kind = kind; +} diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -6940,6 +6940,8 @@ NestedNameSpecInfo &IdInfo, bool EnteringContext); + bool IsInvalidSMECallConversion(QualType FromType, QualType ToType); + /// The parser has parsed a nested-name-specifier /// 'template[opt] template-name < template-args >::'. /// @@ -13261,7 +13263,10 @@ bool CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); bool CheckMVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool ParseSVEImmChecks(CallExpr *TheCall, + SmallVector, 3> &ImmChecks); bool CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); + bool CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall); bool CheckCDEBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall); bool CheckARMCoprocessorImmediate(const TargetInfo &TI, const Expr *CoprocArg, diff --git a/clang/include/clang/module.modulemap b/clang/include/clang/module.modulemap --- a/clang/include/clang/module.modulemap +++ b/clang/include/clang/module.modulemap @@ -49,6 +49,7 @@ textual header "Basic/BuiltinsRISCV.def" textual header "Basic/BuiltinsRISCVVector.def" textual header "Basic/BuiltinsSVE.def" + textual header "Basic/BuiltinsSME.def" textual header "Basic/BuiltinsSystemZ.def" textual header "Basic/BuiltinsVE.def" textual header "Basic/BuiltinsVEVL.gen.def" diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2265,6 +2265,11 @@ Width = 0; \ Align = 16; \ break; +#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingletonId) \ + case BuiltinType::Id: \ + Width = 0; \ + Align = 16; \ + break; #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ @@ -3963,6 +3968,10 @@ return SVE_INT_ELTTY(64, 2, false, 4); case BuiltinType::SveBool: return SVE_ELTTY(BoolTy, 16, 1); + case BuiltinType::SveBoolx2: + return SVE_ELTTY(BoolTy, 16, 2); + case BuiltinType::SveBoolx4: + return SVE_ELTTY(BoolTy, 16, 4); case BuiltinType::SveFloat16: return SVE_ELTTY(HalfTy, 8, 1); case BuiltinType::SveFloat16x2: @@ -4033,6 +4042,7 @@ #define SVE_PREDICATE_TYPE(Name, MangledName, Id, SingletonId, NumEls) \ if (EltTy->isBooleanType() && NumElts == NumEls) \ return SingletonId; +#define SVE_OPAQUE_TYPE(Name, MangledName, Id, SingleTonId) #include "clang/Basic/AArch64SVEACLETypes.def" } else if (Target->hasRISCVVTypes()) { uint64_t EltTySize = getTypeSize(EltTy); @@ -9438,9 +9448,10 @@ /// getSVETypeSize - Return SVE vector or predicate register size. static uint64_t getSVETypeSize(ASTContext &Context, const BuiltinType *Ty) { assert(Ty->isVLSTBuiltinType() && "Invalid SVE Type"); - return Ty->getKind() == BuiltinType::SveBool - ? (Context.getLangOpts().VScaleMin * 128) / Context.getCharWidth() - : Context.getLangOpts().VScaleMin * 128; + if (Ty->getKind() == BuiltinType::SveBool || + Ty->getKind() == BuiltinType::SveCount) + return (Context.getLangOpts().VScaleMin * 128) / Context.getCharWidth(); + return Context.getLangOpts().VScaleMin * 128; } bool ASTContext::areCompatibleSveTypes(QualType FirstType, @@ -11295,6 +11306,10 @@ Type = Context.getScalableVectorType(ElementType, NumElements); break; } + case 'Q': { + Type = Context.SveCountTy; + break; + } case 'V': { char *End; unsigned NumElements = strtoul(Str, &End, 10); diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3129,6 +3129,12 @@ Out << (type_name == InternalName ? "u" : "") << type_name.size() \ << type_name; \ break; +#define SVE_OPAQUE_TYPE(InternalName, MangledName, Id, SingletonId) \ + case BuiltinType::Id: \ + type_name = MangledName; \ + Out << (type_name == InternalName ? "u" : "") << type_name.size() \ + << type_name; \ + break; #include "clang/Basic/AArch64SVEACLETypes.def" #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp --- a/clang/lib/AST/Type.cpp +++ b/clang/lib/AST/Type.cpp @@ -2337,6 +2337,8 @@ case BuiltinType::SveFloat64: case BuiltinType::SveBFloat16: case BuiltinType::SveBool: + case BuiltinType::SveBoolx2: + case BuiltinType::SveBoolx4: return true; default: return false; @@ -3198,6 +3200,13 @@ argSlot[i] = params[i]; } + // Propagate the SME ACLE attributes. + if (epi.AArch64SMEAttributes != SME_NormalFunction) { + auto &ExtraBits = *getTrailingObjects(); + ExtraBits.AArch64SMEAttributes = epi.AArch64SMEAttributes; + } + + // Fill in the exception type array if present. if (getExceptionSpecType() == EST_Dynamic) { auto &ExtraBits = *getTrailingObjects(); @@ -3391,6 +3400,8 @@ for (unsigned i = 0; i != NumParams; ++i) ID.AddInteger(epi.ExtParameterInfos[i].getOpaqueValue()); } + ID.AddInteger(epi.AArch64SMEAttributes); + epi.ExtInfo.Profile(ID); ID.AddBoolean(epi.HasTrailingReturn); } diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -915,6 +915,24 @@ FunctionType::ExtInfo Info = T->getExtInfo(); + if ((T->getAArch64SMEAttributes() & + FunctionType::SME_PStateSMCompatibleMask) && + !InsideCCAttribute) + OS << " __attribute__((arm_streaming_compatible))"; + if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) && + !InsideCCAttribute) + OS << " __attribute__((arm_streaming))"; + if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateZANewMask) && + !InsideCCAttribute) + OS << " __attribute__((arm_new_za))"; + if ((T->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) && + !InsideCCAttribute) + OS << " __attribute__((arm_shared_za))"; + if ((T->getAArch64SMEAttributes() & + FunctionType::SME_PStateZAPreservedMask) && + !InsideCCAttribute) + OS << " __attribute__((arm_preserves_za))"; + printFunctionAfter(Info, OS); if (!T->getMethodQuals().empty()) @@ -1792,6 +1810,11 @@ break; } case attr::AArch64VectorPcs: OS << "aarch64_vector_pcs"; break; + case attr::ArmStreaming: OS << "arm_streaming"; break; + case attr::ArmStreamingCompatible: OS << "arm_streaming_compatible"; break; + case attr::ArmNewZA: OS << "arm_new_za"; break; + case attr::ArmPreservesZA: OS << "arm_preserves_za"; break; + case attr::ArmSharedZA: OS << "arm_shared_za"; break; case attr::AArch64SVEPcs: OS << "aarch64_sve_pcs"; break; case attr::AMDGPUKernelCall: OS << "amdgpu_kernel"; break; case attr::IntelOclBicc: OS << "inteloclbicc"; break; diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -54,6 +54,8 @@ bool HasLSE; bool HasFlagM; bool HasMOPS; + bool HasSME; + bool HasSME2; bool HasRCPC; llvm::AArch64::ArchKind ArchKind; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -33,6 +33,10 @@ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #include "clang/Basic/BuiltinsSVE.def" +#define BUILTIN(ID, TYPE, ATTRS) \ + {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, +#include "clang/Basic/BuiltinsSME.def" + #define BUILTIN(ID, TYPE, ATTRS) \ {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr}, #define LANGBUILTIN(ID, TYPE, ATTRS, LANG) \ @@ -400,9 +404,8 @@ Builder.defineMacro("__ARM_FEATURE_BF16_SCALAR_ARITHMETIC", "1"); } - if ((FPU & SveMode) && HasBFloat16) { + if ((FPU & SveMode) && HasBFloat16) Builder.defineMacro("__ARM_FEATURE_SVE_BF16", "1"); - } if ((FPU & SveMode) && HasMatmulFP64) Builder.defineMacro("__ARM_FEATURE_SVE_MATMUL_FP64", "1"); @@ -528,6 +531,8 @@ .Cases("aarch64", "arm64", "arm", true) .Case("neon", FPU & NeonMode) .Cases("sve", "sve2", "sve2-bitperm", "sve2-aes", "sve2-sha3", "sve2-sm4", "f64mm", "f32mm", "i8mm", "bf16", FPU & SveMode) + .Case("sme", HasSME) + .Case("sme2", HasSME2) .Case("ls64", HasLS64) .Default(false); } @@ -575,6 +580,8 @@ HasMatmulFP32 = false; HasLSE = false; HasMOPS = false; + HasSME = false; + HasSME2 = false; HasRCPC = false; ArchKind = llvm::AArch64::ArchKind::INVALID; @@ -582,6 +589,19 @@ for (const auto &Feature : Features) { if (Feature == "+neon") FPU |= NeonMode; + if (Feature == "+sme") { + FPU |= SveMode; + HasSME = true; + HasBFloat16 = true; + HasFullFP16 = true; + } + if (Feature == "+sme2") { + FPU |= SveMode; + HasSME = true; + HasSME2 = true; + HasBFloat16 = true; + HasFullFP16 = true; + } if (Feature == "+sve") { FPU |= SveMode; HasFullFP16 = true; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6598,11 +6598,32 @@ #undef SVEMAP1 #undef SVEMAP2 +#define SMEMAP1(NameBase, LLVMIntrinsic, TypeModifier) \ + { \ +#NameBase, SME::BI__builtin_sme_##NameBase, Intrinsic::LLVMIntrinsic, 0, \ + TypeModifier \ + } + +#define SMEMAP2(NameBase, TypeModifier) \ + { \ +#NameBase, SME::BI__builtin_sme_##NameBase, 0, 0, TypeModifier \ + } + +static const ARMVectorIntrinsicInfo AArch64SMEIntrinsicMap[] = { +#define GET_SME_LLVM_INTRINSIC_MAP +#include "clang/Basic/arm_sme_builtin_cg.inc" +#undef GET_SME_LLVM_INTRINSIC_MAP +}; + +#undef SMEMAP1 +#undef SMEMAP2 + static bool NEONSIMDIntrinsicsProvenSorted = false; static bool AArch64SIMDIntrinsicsProvenSorted = false; static bool AArch64SISDIntrinsicsProvenSorted = false; static bool AArch64SVEIntrinsicsProvenSorted = false; +static bool AArch64SMEIntrinsicsProvenSorted = false; static const ARMVectorIntrinsicInfo * findARMVectorIntrinsicInMap(ArrayRef IntrinsicMap, @@ -8737,6 +8758,8 @@ return Builder.getInt32Ty(); case SVETypeFlags::MemEltTyInt64: return Builder.getInt64Ty(); + case SVETypeFlags::MemEltTyInt128: + return Builder.getInt128Ty(); } llvm_unreachable("Unknown MemEltType"); } @@ -8863,6 +8886,9 @@ // the elements of the specified datatype. Value *CodeGenFunction::EmitSVEPredicateCast(Value *Pred, llvm::ScalableVectorType *VTy) { + if (Pred->getType()->isAArch64SvcountTy()) + return Pred; + auto *RTy = llvm::VectorType::get(IntegerType::get(getLLVMContext(), 1), VTy); if (Pred->getType() == RTy) return Pred; @@ -8872,6 +8898,7 @@ switch (VTy->getMinNumElements()) { default: llvm_unreachable("unsupported element count!"); + case 1: case 2: case 4: case 8: @@ -9040,12 +9067,16 @@ unsigned N; switch (IntID) { case Intrinsic::aarch64_sve_ld2_sret: + case Intrinsic::aarch64_sve_ld1_pn_vg2: + case Intrinsic::aarch64_sve_ldnt1_pn_vg2: N = 2; break; case Intrinsic::aarch64_sve_ld3_sret: N = 3; break; case Intrinsic::aarch64_sve_ld4_sret: + case Intrinsic::aarch64_sve_ld1_pn_vg4: + case Intrinsic::aarch64_sve_ldnt1_pn_vg4: N = 4; break; default: @@ -9054,7 +9085,7 @@ auto RetTy = llvm::VectorType::get(VTy->getElementType(), VTy->getElementCount() * N); - Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); + Value *Predicate = EmitSVEPredicateCast(Ops[0], VTy); Value *BasePtr= Builder.CreateBitCast(Ops[1], VecPtrTy); Value *Offset = Ops.size() > 2 ? Ops[2] : Builder.getInt32(0); BasePtr = Builder.CreateGEP(VTy, BasePtr, Offset); @@ -9081,12 +9112,16 @@ unsigned N; switch (IntID) { case Intrinsic::aarch64_sve_st2: + case Intrinsic::aarch64_sve_st1_pn_vg2: + case Intrinsic::aarch64_sve_stnt1_pn_vg2: N = 2; break; case Intrinsic::aarch64_sve_st3: N = 3; break; case Intrinsic::aarch64_sve_st4: + case Intrinsic::aarch64_sve_st1_pn_vg4: + case Intrinsic::aarch64_sve_stnt1_pn_vg4: N = 4; break; default: @@ -9114,6 +9149,7 @@ return Builder.CreateCall(F, Operands); } + // SVE2's svpmullb and svpmullt builtins are similar to the svpmullb_pair and // svpmullt_pair intrinsics, with the exception that their results are bitcast // to a wider type. @@ -9280,6 +9316,193 @@ return {DefaultType}; } +static llvm::Value *getSMESliceIndex(CGBuilderTy &Builder, llvm::Value *Base, + llvm::Value *Offset) { + llvm::APInt OffsetVal = cast(Offset)->getValue().trunc(32); + Offset = ConstantInt::get(Base->getType(), OffsetVal); + return Builder.CreateAdd(Base, Offset); +} + +Value *CodeGenFunction::EmitSMELoadStore(const SVETypeFlags &TypeFlags, + unsigned BuiltinID, + SmallVectorImpl &Ops) { + // We support 3 classes of builtins here: + // 1. 4-op forms: + // svldr_vnum_za(slice_base, slice_offset, pn, ptr) + // svstr_vnum_za(slice_base, slice_offset, pn, ptr) + // 2. 5-op forms: + // svld1_(ver|hor)_zaX(tile, slice_base, slice_offset, pn, ptr) + // svst1_(ver|hor)_zaX(tile, slice_base, slice_offset, pn, ptr) + // 3. 6-op forms: + // svld1_(ver|hor)_vnum_zaX(tile, slice_base, slice_offset, pn, ptr, vnum) + // svst1_(ver|hor)_vnum_zaX(tile, slice_base, slice_offset, pn, ptr, vnum) + int NumOps = Ops.size(); + llvm::Value *Ptr = NumOps == 4 ? Ops[3] : Ops[4]; + + if (NumOps != 5) { + // For the 4-op form, slice_offset == vnum. + llvm::Value *VNum = NumOps == 4 ? Ops[1] : Ops[5]; + + Function *F = CGM.getIntrinsic(Intrinsic::aarch64_sme_cntsb); + llvm::Value *SVL = Builder.CreateCall(F); + Value *Offset = + Builder.CreateMul(Builder.CreateZExtOrTrunc(VNum, SVL->getType()), SVL); + + llvm::Type *ByteType = Builder.getInt8Ty(); + Ptr = Builder.CreateGEP(ByteType, Ptr, Offset); + } + + Function *F = CGM.getIntrinsic(BuiltinID); + if (NumOps == 4) { + // Pn is not used at all in the intrinsic because the instruction doesn't + // use it either. This will need to be updated in the specification. + llvm::Value *Slice = getSMESliceIndex(Builder, Ops[0], Ops[1]); + return Builder.CreateCall(F, {Slice, Ptr}); + } + llvm::Value *Slice = getSMESliceIndex(Builder, Ops[1], Ops[2]); + auto *OverloadedTy = + getSVEVectorForElementType(SVEBuiltinMemEltTy(TypeFlags)); + llvm::Value *Pn = EmitSVEPredicateCast(Ops[3], OverloadedTy); + return Builder.CreateCall(F, {Pn, Ptr, /*Tile*/ Ops[0], Slice}); +} + +Value *CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, + const CallExpr *E) { + // Find out if any arguments are required to be integer constant expressions. + unsigned ICEArguments = 0; + ASTContext::GetBuiltinTypeError Error; + getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments); + assert(Error == ASTContext::GE_None && "Should not codegen an error"); + + llvm::SmallVector Ops; + for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) { + if ((ICEArguments & (1 << i)) == 0) { + Value *Arg = EmitScalarExpr(E->getArg(i)); + if (auto *VTy = dyn_cast(Arg->getType())) { + unsigned MinElts = VTy->getMinNumElements(); + bool IsPred = VTy->getElementType()->isIntegerTy(1); + unsigned N = + (MinElts * VTy->getScalarSizeInBits()) / (IsPred ? 16 : 128); + for (unsigned I = 0; I < N; ++I) { + Value *Idx = ConstantInt::get(CGM.Int64Ty, (I * MinElts) / N); + auto *NewVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts / N); + Ops.push_back(Builder.CreateExtractVector(NewVTy, Arg, Idx)); + } + } else + Ops.push_back(Arg); + } else { + // If this is required to be a constant, constant fold it so that we know + // that the generated intrinsic gets a ConstantInt. + Optional Result = + E->getArg(i)->getIntegerConstantExpr(getContext()); + assert(Result && "Expected argument to be a constant"); + + // Immediates for SME llvm intrinsics are always 32bit. We can safely + // truncate because the immediate has been range checked and no valid + // immediate requires more than a handful of bits. + *Result = Result->extOrTrunc(32); + Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), *Result)); + } + } + + auto *Builtin = findARMVectorIntrinsicInMap(AArch64SMEIntrinsicMap, BuiltinID, + AArch64SMEIntrinsicsProvenSorted); + SVETypeFlags TypeFlags(Builtin->TypeModifier); + if (TypeFlags.isLoad() || TypeFlags.isStore()) + return EmitSMELoadStore(TypeFlags, Builtin->LLVMIntrinsic, Ops); + + if (TypeFlags.isArmInStreamingMode()) { + // Create call to __arm_sme_state and extract lower bit from X0. + CallInst *CI = EmitRuntimeCall(CGM.CreateRuntimeFunction( + llvm::FunctionType::get(StructType::get(CGM.Int64Ty, CGM.Int64Ty), {}, + false), + "__arm_sme_state")); + auto Attrs = + AttributeList() + .addFnAttribute(getLLVMContext(), "aarch64_pstate_sm_compatible") + .addFnAttribute(getLLVMContext(), "aarch64_pstate_za_preserved"); + CI->setAttributes(Attrs); + CI->setCallingConv( + llvm::CallingConv:: + AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); + Value *X0 = Builder.CreateExtractValue(CI, 0); + Value *X0Bit0 = Builder.CreateAnd(X0, Builder.getInt64(1)); + return Builder.CreateZExtOrTrunc(X0Bit0, ConvertType(E->getType())); + } + + // Should not happen! + if (Builtin->LLVMIntrinsic == 0) + return nullptr; + + if (TypeFlags.isZASliceBaseOffsetIntr()) { + llvm::Value *SliceBase = Ops[0]; + llvm::Value *SliceOff = Ops[1]; + llvm::Value *Slice = getSMESliceIndex(Builder, SliceBase, SliceOff); + Ops[0] = Slice; + Ops.erase(Ops.begin() + 1); + } + + // Predicates must match the main datatype. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (auto PredTy = dyn_cast(Ops[i]->getType())) + if (PredTy->getElementType()->isIntegerTy(1)) { + PredTy = getSVEType(TypeFlags); + Ops[i] = EmitSVEPredicateCast(Ops[i], PredTy); + } + + if (TypeFlags.isReadZA() || TypeFlags.isWriteZA()) { + llvm::ScalableVectorType *FnType = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(Builtin->LLVMIntrinsic, {FnType}); + + // FIXME! We probably shouldn't be adding the slice_base (Op[1]) to the + // slice_offset (Op[2]) here. Instead I think it makes sense to change the + // LLVM intrinsic to match the ACLE intrinsic. + llvm::Value *SliceBase = TypeFlags.isReadZA() ? Ops[3] : Ops[1]; + llvm::Value *SliceOff = TypeFlags.isReadZA() ? Ops[4] : Ops[2]; + llvm::Value *Slice = getSMESliceIndex(Builder, SliceBase, SliceOff); + + if (TypeFlags.isReadZA()) + return Builder.CreateCall( + F, {/*Passthru*/ Ops[0], /*Pg*/ Ops[1], /*Tile*/ Ops[2], Slice}); + else + return Builder.CreateCall( + F, {/*Tile*/ Ops[0], Slice, /*Pg*/ Ops[3], /*Write data*/ Ops[4]}); + } + + Function *F = + TypeFlags.isOverloadNone() + ? CGM.getIntrinsic(Builtin->LLVMIntrinsic) + : CGM.getIntrinsic(Builtin->LLVMIntrinsic, {getSVEType(TypeFlags)}); + Value *Call = Builder.CreateCall(F, Ops); + + // Predicate results must be converted to svbool_t. + if (auto PredTy = dyn_cast(Call->getType())) + if (PredTy->getScalarType()->isIntegerTy(1)) + Call = EmitSVEPredicateCast(Call, ScalableVectorType::get(Builder.getInt1Ty(), 16)); + + // Multi-vector results should be broken up into a single (wide) result vector. + if (auto *StructTy = dyn_cast(Call->getType())) { + if (auto *VTy = dyn_cast(StructTy->getTypeAtIndex(0U))) { + unsigned N = StructTy->getNumElements(); + unsigned MinElts = VTy->getMinNumElements(); + ScalableVectorType *WideVTy = + ScalableVectorType::get(VTy->getElementType(), MinElts * N); + + Value *Ret = llvm::PoisonValue::get(WideVTy); + for (unsigned I = 0; I < N; ++I) { + Value *SRet = Builder.CreateExtractValue(Call, I); + assert(SRet->getType() == VTy && "Unexpected type for result value"); + Value *Idx = ConstantInt::get(CGM.Int64Ty, I * MinElts); + Ret = Builder.CreateInsertVector(WideVTy, Ret, SRet, Idx); + } + Call = Ret; + } + } + + return Call; +} + Value *CodeGenFunction::EmitSVETupleSetOrGet(const SVETypeFlags &TypeFlags, llvm::Type *Ty, ArrayRef Ops) { @@ -9654,6 +9877,10 @@ BuiltinID <= clang::AArch64::LastSVEBuiltin) return EmitAArch64SVEBuiltinExpr(BuiltinID, E); + if (BuiltinID >= AArch64::FirstSMEBuiltin && + BuiltinID <= AArch64::LastSMEBuiltin) + return EmitAArch64SMEBuiltinExpr(BuiltinID, E); + unsigned HintID = static_cast(-1); switch (BuiltinID) { default: break; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1765,6 +1765,17 @@ if (!isUnresolvedExceptionSpec(FPT->getExceptionSpecType()) && FPT->isNothrow()) FuncAttrs.addAttribute(llvm::Attribute::NoUnwind); + + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + FuncAttrs.addAttribute("aarch64_pstate_sm_enabled"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + FuncAttrs.addAttribute("aarch64_pstate_sm_compatible"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZANewMask) + FuncAttrs.addAttribute("aarch64_pstate_za_new"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZASharedMask) + FuncAttrs.addAttribute("aarch64_pstate_za_shared"); + if (FPT->getAArch64SMEAttributes() & FunctionType::SME_PStateZAPreservedMask) + FuncAttrs.addAttribute("aarch64_pstate_za_preserved"); } static void AddAttributesFromAssumes(llvm::AttrBuilder &FuncAttrs, @@ -2216,6 +2227,24 @@ llvm::toStringRef(CodeGenOpts.UniformWGSize)); } } + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_sm_enabled"); + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_sm_body"); + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_sm_compatible"); + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_za_shared"); + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_za_preserved"); + + if (TargetDecl->hasAttr()) + FuncAttrs.addAttribute("aarch64_pstate_za_new"); } // Attach "no-builtins" attributes to: diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -722,7 +722,12 @@ #include "clang/Basic/AArch64SVEACLETypes.def" { ASTContext::BuiltinVectorTypeInfo Info = - CGM.getContext().getBuiltinVectorTypeInfo(BT); + // The debug info for svcount_t is the same as that for svbool_t + BT->getKind() == BuiltinType::SveCount + ? ASTContext::BuiltinVectorTypeInfo( + CGM.getContext().BoolTy, + llvm::ElementCount::getScalable(16), 1) + : CGM.getContext().getBuiltinVectorTypeInfo(BT); unsigned NumElemsPerVG = (Info.EC.getKnownMinValue() * Info.NumVectors) / 2; // Debuggers can't extract 1bit from a vector, so will display a diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4243,6 +4243,10 @@ SmallVectorImpl &Ops, unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitSMELoadStore(const SVETypeFlags &TypeFlags, + unsigned BuiltinID, + SmallVectorImpl &Ops); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1989,6 +1989,12 @@ return; } + // We only need to handle the 'arm_locally_streaming' attribute as a + // special case here (as opposed to e.g. 'arm_streaming'), because it + // is not set from the prototype, but rather from the definition. + if (D->hasAttr()) + B.addAttribute("aarch64_pstate_sm_body"); + // Track whether we need to add the optnone LLVM attribute, // starting with the default for this optimization level. bool ShouldAddOptNone = diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -596,6 +596,8 @@ case BuiltinType::SveInt64x4: case BuiltinType::SveUint64x4: case BuiltinType::SveBool: + case BuiltinType::SveBoolx2: + case BuiltinType::SveBoolx4: case BuiltinType::SveFloat16: case BuiltinType::SveFloat16x2: case BuiltinType::SveFloat16x3: @@ -618,6 +620,8 @@ Info.EC.getKnownMinValue() * Info.NumVectors); } + case BuiltinType::SveCount: + return llvm::Type::getAArch64SvcountTy(getLLVMContext()); #define PPC_VECTOR_TYPE(Name, Id, Size) \ case BuiltinType::Id: \ ResultType = \ diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -315,6 +315,8 @@ clang_generate_header(-gen-arm-fp16 arm_fp16.td arm_fp16.h) # Generate arm_sve.h clang_generate_header(-gen-arm-sve-header arm_sve.td arm_sve.h) + # Generate arm_sme.h + clang_generate_header(-gen-arm-sme-header arm_sme.td arm_sme.h) # Generate arm_bf16.h clang_generate_header(-gen-arm-bf16 arm_bf16.td arm_bf16.h) # Generate arm_mve.h @@ -339,6 +341,7 @@ list(APPEND aarch64_only_generated_files "${CMAKE_CURRENT_BINARY_DIR}/arm_sve.h" + "${CMAKE_CURRENT_BINARY_DIR}/arm_sme.h" "${CMAKE_CURRENT_BINARY_DIR}/arm_bf16.h" "${output_dir}/arm_neon_sve_bridge.h" ) diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2783,18 +2783,8 @@ llvm_unreachable("Invalid NeonTypeFlag!"); } -bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { - // Range check SVE intrinsics that take immediate values. - SmallVector, 3> ImmChecks; - - switch (BuiltinID) { - default: - return false; -#define GET_SVE_IMMEDIATE_CHECK -#include "clang/Basic/arm_sve_sema_rangechecks.inc" -#undef GET_SVE_IMMEDIATE_CHECK - } - +bool Sema::ParseSVEImmChecks( + CallExpr *TheCall, SmallVector, 3> &ImmChecks) { // Perform all the immediate checks for this builtin call. bool HasError = false; for (auto &I : ImmChecks) { @@ -2898,14 +2888,169 @@ if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 3)) HasError = true; break; + case SVETypeFlags::ImmCheck0: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 0)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_15: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 15)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_255: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 255)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_2_Mul2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 2) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_6_Mul2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 6) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_14_Mul2: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 14) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 2)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_4_Mul4: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 4) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 4)) + HasError = true; + break; + case SVETypeFlags::ImmCheck0_12_Mul4: + if (SemaBuiltinConstantArgRange(TheCall, ArgNum, 0, 12) || + SemaBuiltinConstantArgMultiple(TheCall, ArgNum, 4)) + HasError = true; + break; } } - return HasError; } +enum ArmStreamingType { + ArmNonStreaming, + ArmStreaming, + ArmStreamingCompatible, + ArmLocallyStreaming +}; + +static ArmStreamingType getArmStreamingFnType(const FunctionDecl *FD) { + if (FD->hasAttr()) + return ArmLocallyStreaming; + if (const auto *T = dyn_cast(FD->getType())) { + if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMEnabledMask) + return ArmStreaming; + if (T->getAArch64SMEAttributes() & FunctionType::SME_PStateSMCompatibleMask) + return ArmStreamingCompatible; + } + return ArmNonStreaming; +} + +static void checkArmStreamingBuiltin(Sema &S, CallExpr *TheCall, + ArmStreamingType FnType, + ArmStreamingType BuiltinType) { + assert(BuiltinType != ArmLocallyStreaming && + "Unexpected locally_streaming attribute for builtin!"); + if ((FnType == ArmStreaming || FnType == ArmLocallyStreaming) && + BuiltinType == ArmNonStreaming) + S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) + << TheCall->getSourceRange() << "streaming or locally streaming"; + + if ((FnType == ArmStreamingCompatible) && + BuiltinType != ArmStreamingCompatible) { + S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) + << TheCall->getSourceRange() << "streaming compatible"; + return; + } + + if (FnType == ArmNonStreaming && BuiltinType == ArmStreaming) + S.Diag(TheCall->getBeginLoc(), diag::warn_attribute_arm_sm_incompat_builtin) + << TheCall->getSourceRange() << "non-streaming"; +} + +bool Sema::CheckSMEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { + ArmStreamingType FnType = getArmStreamingFnType(FD); + Optional BuiltinType = None; + + switch (BuiltinID) { + default: + break; +#define GET_SME_STREAMING_ATTRS +#include "clang/Basic/arm_sme_streaming_attrs.inc" +#undef GET_SME_STREAMING_ATTRS + } + + if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, FnType, *BuiltinType); + } + + // Range check SME intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: + return false; +#define GET_SME_IMMEDIATE_CHECK +#include "clang/Basic/arm_sme_sema_rangechecks.inc" +#undef GET_SME_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + +bool Sema::CheckSVEBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { + ArmStreamingType SmType = getArmStreamingFnType(FD); + Optional BuiltinType = None; + + switch (BuiltinID) { + default: + break; +#define GET_SVE_STREAMING_ATTRS +#include "clang/Basic/arm_sve_streaming_attrs.inc" +#undef GET_SVE_STREAMING_ATTRS + } + + if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, SmType, *BuiltinType); + } + + // Range check SVE intrinsics that take immediate values. + SmallVector, 3> ImmChecks; + + switch (BuiltinID) { + default: + return false; +#define GET_SVE_IMMEDIATE_CHECK +#include "clang/Basic/arm_sve_sema_rangechecks.inc" +#undef GET_SVE_IMMEDIATE_CHECK + } + + return ParseSVEImmChecks(TheCall, ImmChecks); +} + bool Sema::CheckNeonBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { + if (const FunctionDecl *FD = getCurFunctionDecl()) { + ArmStreamingType SmType = getArmStreamingFnType(FD); + Optional BuiltinType = None; + + switch (BuiltinID) { + default: + break; +#define GET_NEON_STREAMING_COMPAT_FLAG +#include "clang/Basic/arm_neon.inc" +#undef GET_NEON_STREAMING_COMPAT_FLAG + } + + if (BuiltinType) + checkArmStreamingBuiltin(*this, TheCall, SmType, *BuiltinType); + } + llvm::APSInt Result; uint64_t mask = 0; unsigned TV = 0; @@ -3266,6 +3411,9 @@ if (CheckSVEBuiltinFunctionCall(BuiltinID, TheCall)) return true; + if (CheckSMEBuiltinFunctionCall(BuiltinID, TheCall)) + return true; + // For intrinsics which take an immediate value as part of the instruction, // range check them here. unsigned i = 0, l = 0, u = 0; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -3743,6 +3743,15 @@ } } + // It is not allowed to redeclare an SME function with different SME + // attributes. + if (IsInvalidSMECallConversion(Old->getType(), New->getType())) { + Diag(New->getLocation(), diag::err_sme_attr_mismatch) + << New->getType() << Old->getType(); + Diag(OldLocation, diag::note_previous_declaration); + return true; + } + // If a function is first declared with a calling convention, but is later // declared or defined without one, all following decls assume the calling // convention of the first. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -275,6 +275,7 @@ if (const auto *A = D->getAttr()) { S.Diag(AL.getLoc(), diag::err_attributes_are_not_compatible) << AL << A; S.Diag(A->getLocation(), diag::note_conflicting_attribute); + AL.setInvalid(); return true; } return false; @@ -5585,6 +5586,14 @@ BuiltinID <= AArch64::LastSVEBuiltin; } +static bool ArmSmeAliasValid(ASTContext &Context, unsigned BuiltinID, + StringRef AliasName) { + if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID)) + BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID); + return BuiltinID >= AArch64::FirstSMEBuiltin && + BuiltinID <= AArch64::LastSMEBuiltin; +} + static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.isArgIdent(0)) { S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type) @@ -5597,7 +5606,8 @@ StringRef AliasName = cast(D)->getIdentifier()->getName(); bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64(); - if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) || + if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName) && + !ArmSmeAliasValid(S.Context, BuiltinID, AliasName)) || (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) && !ArmCdeAliasValid(BuiltinID, AliasName))) { S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias); @@ -9203,6 +9213,10 @@ handleArmBuiltinAliasAttr(S, D, AL); break; + case ParsedAttr::AT_ArmLocallyStreaming: + handleSimpleAttribute(S, D, AL); + break; + case ParsedAttr::AT_AcquireHandle: handleAcquireHandleAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -17660,6 +17660,14 @@ } } + // SME attributes must match when overriding a function declaration. + if (IsInvalidSMECallConversion(Old->getType(), New->getType())) { + Diag(New->getLocation(), diag::err_conflicting_overriding_attributes) + << New->getDeclName() << New->getType() << Old->getType(); + Diag(Old->getLocation(), diag::note_overridden_virtual_function); + return true; + } + // Virtual overrides must have the same code_seg. const auto *OldCSA = Old->getAttr(); const auto *NewCSA = New->getAttr(); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -9216,6 +9216,20 @@ ColonLoc, result, VK, OK); } +// Check that the SME attributes for PSTATE.ZA and PSTATE.SM are compatible. +bool Sema::IsInvalidSMECallConversion(QualType FromType, QualType ToType) { + if (const auto *ToFn = + dyn_cast(Context.getCanonicalType(ToType))) + if (const auto *FromFn = + dyn_cast(Context.getCanonicalType(FromType))) + return (ToFn->getAArch64SMEAttributes() & + FunctionType::SME_AttributeMask) != + (FromFn->getAArch64SMEAttributes() & + FunctionType::SME_AttributeMask); + + return false; +} + // Check if we have a conversion between incompatible cmse function pointer // types, that is, a conversion between a function pointer with the // cmse_nonsecure_call attribute and one without. @@ -9374,6 +9388,8 @@ return Sema::IncompatibleFunctionPointer; if (IsInvalidCmseNSCallConversion(S, ltrans, rtrans)) return Sema::IncompatibleFunctionPointer; + if (S.IsInvalidSMECallConversion(ltrans, rtrans)) + return Sema::IncompatibleFunctionPointer; return ConvTy; } diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -138,6 +138,11 @@ case ParsedAttr::AT_NoReturn: \ case ParsedAttr::AT_Regparm: \ case ParsedAttr::AT_CmseNSCall: \ + case ParsedAttr::AT_ArmStreaming: \ + case ParsedAttr::AT_ArmStreamingCompatible: \ + case ParsedAttr::AT_ArmNewZA: \ + case ParsedAttr::AT_ArmSharedZA: \ + case ParsedAttr::AT_ArmPreservesZA: \ case ParsedAttr::AT_AnyX86NoCallerSavedRegisters: \ case ParsedAttr::AT_AnyX86NoCfCheck: \ CALLING_CONV_ATTRS_CASELIST @@ -7640,6 +7645,24 @@ llvm_unreachable("unexpected attribute kind!"); } +static bool checkMutualExclusion(TypeProcessingState &state, + const FunctionProtoType::ExtProtoInfo &EPI, + ParsedAttr &Attr, + AttributeCommonInfo::Kind OtherKind) { + auto OtherAttr = std::find_if( + state.getCurrentAttributes().begin(), state.getCurrentAttributes().end(), + [OtherKind](const ParsedAttr &A) { return A.getKind() == OtherKind; }); + if (OtherAttr == state.getCurrentAttributes().end() || OtherAttr->isInvalid()) + return false; + + Sema &S = state.getSema(); + S.Diag(Attr.getLoc(), diag::err_attributes_are_not_compatible) + << OtherAttr->getAttrName() << Attr; + S.Diag(OtherAttr->getLoc(), diag::note_conflicting_attribute); + Attr.setInvalid(); + return true; +} + /// Process an individual function attribute. Returns true to /// indicate that the attribute was handled, false if it wasn't. static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr, @@ -7769,6 +7792,65 @@ return true; } + if (attr.getKind() == ParsedAttr::AT_ArmStreaming || + attr.getKind() == ParsedAttr::AT_ArmStreamingCompatible || + attr.getKind() == ParsedAttr::AT_ArmNewZA || + attr.getKind() == ParsedAttr::AT_ArmSharedZA || + attr.getKind() == ParsedAttr::AT_ArmPreservesZA){ + if (S.CheckAttrTarget(attr) || S.CheckAttrNoArgs(attr)) + return true; + + if (!unwrapped.isFunctionType()) + return false; + + const FunctionProtoType *FnTy = unwrapped.get()->getAs(); + if (!FnTy) { + // SME ACLE attributes are not supported on K&R-style unprototyped C + // functions. + S.Diag(attr.getLoc(), diag::warn_attribute_ignored) << attr; + attr.setInvalid(); + return false; + } + + FunctionProtoType::ExtProtoInfo EPI = FnTy->getExtProtoInfo(); + switch (attr.getKind()) { + case ParsedAttr::AT_ArmStreaming: + if (checkMutualExclusion(state, EPI, attr, + ParsedAttr::AT_ArmStreamingCompatible)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateSMEnabledMask); + break; + case ParsedAttr::AT_ArmStreamingCompatible: + if (checkMutualExclusion(state, EPI, attr, ParsedAttr::AT_ArmStreaming)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateSMCompatibleMask); + break; + case ParsedAttr::AT_ArmNewZA: + if (checkMutualExclusion(state, EPI, attr, ParsedAttr::AT_ArmSharedZA) || + checkMutualExclusion(state, EPI, attr, ParsedAttr::AT_ArmPreservesZA)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateZANewMask); + break; + case ParsedAttr::AT_ArmSharedZA: + if (checkMutualExclusion(state, EPI, attr, ParsedAttr::AT_ArmNewZA)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateZASharedMask); + break; + case ParsedAttr::AT_ArmPreservesZA: + if (checkMutualExclusion(state, EPI, attr, ParsedAttr::AT_ArmNewZA)) + return true; + EPI.setArmSMEAttribute(FunctionType::SME_PStateZAPreservedMask); + break; + default: + llvm_unreachable("Unsupported attribute"); + } + + QualType newtype = S.Context.getFunctionType(FnTy->getReturnType(), + FnTy->getParamTypes(), EPI); + type = unwrapped.wrap(S, newtype->getAs()); + return true; + } + if (attr.getKind() == ParsedAttr::AT_NoThrow) { // Delay if this is not a function type. if (!unwrapped.isFunctionType()) diff --git a/clang/test/AST/ast-dump-aarch64-sve-types.c b/clang/test/AST/ast-dump-aarch64-sve-types.c --- a/clang/test/AST/ast-dump-aarch64-sve-types.c +++ b/clang/test/AST/ast-dump-aarch64-sve-types.c @@ -47,3 +47,6 @@ // CHECK: TypedefDecl {{.*}} implicit __SVBool_t '__SVBool_t' // CHECK-NEXT: -BuiltinType {{.*}} '__SVBool_t' + +// CHECK: TypedefDecl {{.*}} implicit __SVCount_t '__SVCount_t' +// CHECK-NEXT: -BuiltinType {{.*}} '__SVCount_t' diff --git a/clang/test/CodeGen/aarch64-debug-sve-vector-types.c b/clang/test/CodeGen/aarch64-debug-sve-vector-types.c --- a/clang/test/CodeGen/aarch64-debug-sve-vector-types.c +++ b/clang/test/CodeGen/aarch64-debug-sve-vector-types.c @@ -9,6 +9,9 @@ // CHECK-DAG: ![[REALELTS1_64]] = !DISubrange(lowerBound: 0, upperBound: !DIExpression(DW_OP_constu, 1, DW_OP_bregx, 46, 0, DW_OP_mul, DW_OP_constu, 1, DW_OP_minus)) __SVBool_t b8; + // CHECK-DAG: name: "__SVCount_t",{{.*}}, baseType: ![[CT1]] + __SVCount_t c8; + // CHECK-DAG: name: "__SVInt8_t",{{.*}}, baseType: ![[CT8:[0-9]+]] // CHECK-DAG: ![[CT8]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTYS8:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS8:[0-9]+]]) // CHECK-DAG: ![[ELTTYS8]] = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char) diff --git a/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c b/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c --- a/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c +++ b/clang/test/CodeGen/aarch64-debug-sve-vectorx2-types.c @@ -64,4 +64,9 @@ // CHECK-DAG: ![[CT64]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY64:[0-9]+]], flags: DIFlagVector, elements: ![[ELTS1x2_64]]) // CHECK-DAG: ![[ELTTY64]] = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) __clang_svfloat64x2_t f64; + + // CHECK: name: "__clang_svboolx2_t", {{.*}}, baseType: ![[CT80:[0-9]+]]) + // CHECK-DAG: ![[CT80]] = !DICompositeType(tag: DW_TAG_array_type, baseType: ![[ELTTY8]], flags: DIFlagVector, elements: ![[ELTS1x2_64]]) + __clang_svboolx2_t i1_t2; + } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.c b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.c @@ -0,0 +1,195 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ +// RUN: -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s + +extern int normal_callee(); + +// == FUNCTION DECLARATIONS == + +__attribute__((arm_streaming)) int streaming_decl(void); +__attribute__((arm_streaming_compatible)) int streaming_compatible_decl(void); +__attribute__((arm_locally_streaming)) int locally_streaming_decl(void); +__attribute__((arm_shared_za)) int shared_za_decl(void); +__attribute__((arm_preserves_za)) int preserves_za_decl(void); +__attribute__((arm_new_za)) int new_za_decl(void); + +// == FUNCTION DEFINITIONS == + +// CHECK-LABEL: @streaming_caller({{.*}}#0 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @normal_callee() #[[ATTR14:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_streaming)) int streaming_caller(void) { + return normal_callee(); +} + +// CHECK-LABEL: @streaming_callee({{.*}}#0 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @streaming_decl() #[[ATTR15:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_streaming)) int streaming_callee(void) { + return streaming_decl(); +} + +// CHECK: declare i32 @streaming_decl(){{.*}}#2 + +// CHECK-LABEL: @streaming_compatible_caller({{.*}}#3 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @normal_callee() #[[ATTR14]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_streaming_compatible)) int streaming_compatible_caller(void) { + return normal_callee(); +} + +// CHECK-LABEL: @streaming_compatible_callee({{.*}}#3 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @streaming_compatible_decl() #[[ATTR16:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_streaming_compatible)) int streaming_compatible_callee(void) { + return streaming_compatible_decl(); +} + +// CHECK: declare i32 @streaming_compatible_decl(){{.*}}#4 + +// CHECK-LABEL: @locally_streaming_caller({{.*}}#5 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @normal_callee() #[[ATTR14]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_locally_streaming)) int locally_streaming_caller(void) { + return normal_callee(); +} + +// CHECK-LABEL: @locally_streaming_callee({{.*}}#5 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @locally_streaming_decl() #[[ATTR17:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_locally_streaming)) int locally_streaming_callee(void) { + return locally_streaming_decl(); +} + +// CHECK: declare i32 @locally_streaming_decl(){{.*}}#6 + +// CHECK-LABEL: @shared_za_caller({{.*}}#7 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @normal_callee() #[[ATTR14]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_shared_za)) int shared_za_caller(void) { + return normal_callee(); +} + +// CHECK-LABEL: @shared_za_callee({{.*}}#7 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @shared_za_decl() #[[ATTR18:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_shared_za)) int shared_za_callee(void) { + return shared_za_decl(); +} + +// CHECK: declare i32 @shared_za_decl(){{.*}}#8 + +// CHECK-LABEL: @preserves_za_caller({{.*}}#9 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @normal_callee() #[[ATTR14]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_preserves_za)) int preserves_za_caller(void) { + return normal_callee(); +} + +// CHECK-LABEL: @preserves_za_callee({{.*}}#9 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @preserves_za_decl() #[[ATTR19:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_preserves_za)) int preserves_za_callee(void) { + return preserves_za_decl(); +} + +// CHECK: declare i32 @preserves_za_decl(){{.*}}#10 + +// CHECK-LABEL: @new_za_caller({{.*}}#11 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @normal_callee() #[[ATTR14]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_new_za)) int new_za_caller(void) { + return normal_callee(); +} + +// CHECK-LABEL: @new_za_callee({{.*}}#11 +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call i32 @new_za_decl() #[[ATTR20:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL]] +// +__attribute__((arm_new_za)) int new_za_callee(void) { + return new_za_decl(); +} + +// CHECK: declare i32 @new_za_decl(){{.*}}#12 + +// Ensure that the attributes are correctly propagated to function types +// and also to callsites. +typedef void __attribute__((arm_streaming)) (*s_ptrty) (int, int); +typedef void __attribute__((arm_streaming_compatible)) (*sc_ptrty) (int, int); +typedef void __attribute__((arm_new_za)) (*nz_ptrty) (int, int); +typedef void __attribute__((arm_shared_za)) (*sz_ptrty) (int, int); +typedef void __attribute__((arm_preserves_za)) (*pz_ptrty) (int, int); + +// CHECK-LABEL: @test_streaming_ptrty( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR15]] +// CHECK-NEXT: ret void +// +void test_streaming_ptrty(s_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_streaming_compatible_ptrty( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR16]] +// CHECK-NEXT: ret void +// +void test_streaming_compatible_ptrty(sc_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_new_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR20]] +// CHECK-NEXT: ret void +// +void __attribute__((arm_shared_za)) test_new_za(nz_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_shared_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR18]] +// CHECK-NEXT: ret void +// +void __attribute__((arm_shared_za)) test_shared_za(sz_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_preserved_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR19]] +// CHECK-NEXT: ret void +// +void __attribute__((arm_shared_za)) test_preserved_za(pz_ptrty f, int x, int y) { return f(x, y); } + + +// CHECK: attributes #0 = { {{.*}}"aarch64_pstate_sm_enabled"{{.*}} } +// CHECK: attributes #2 = { {{.*}}"aarch64_pstate_sm_enabled"{{.*}} } +// CHECK: attributes #3 = { {{.*}}"aarch64_pstate_sm_compatible"{{.*}} } +// CHECK: attributes #4 = { {{.*}}"aarch64_pstate_sm_compatible"{{.*}} } +// CHECK: attributes #5 = { {{.*}}"aarch64_pstate_sm_body"{{.*}} } +// CHECK: attributes #6 = { {{.*}}"aarch64_pstate_sm_body"{{.*}} } +// CHECK: attributes #7 = { {{.*}}"aarch64_pstate_za_shared"{{.*}} } +// CHECK: attributes #8 = { {{.*}}"aarch64_pstate_za_shared"{{.*}} } +// CHECK: attributes #9 = { {{.*}}"aarch64_pstate_za_preserved"{{.*}} } +// CHECK: attributes #10 = { {{.*}}"aarch64_pstate_za_preserved"{{.*}} } +// CHECK: attributes #11 = { {{.*}}"aarch64_pstate_za_new"{{.*}} } +// CHECK: attributes #12 = { {{.*}}"aarch64_pstate_za_new"{{.*}} } +// CHECK: attributes #15 = { {{.*}}"aarch64_pstate_sm_enabled"{{.*}} } +// CHECK: attributes #16 = { {{.*}}"aarch64_pstate_sm_compatible"{{.*}} } +// CHECK: attributes #17 = { {{.*}}"aarch64_pstate_sm_body"{{.*}} } +// CHECK: attributes #18 = { {{.*}}"aarch64_pstate_za_shared"{{.*}} } +// CHECK: attributes #19 = { {{.*}}"aarch64_pstate_za_preserved"{{.*}} } +// CHECK: attributes #20 = { {{.*}}"aarch64_pstate_za_new"{{.*}} } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/aarch64-sme-attrs.cpp @@ -0,0 +1,280 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme \ +// RUN: -S -O0 -Werror -emit-llvm -o - %s | FileCheck %s + +extern "C" { + +extern int normal_callee(); + +// == FUNCTION DECLARATIONS == + +__attribute__((arm_streaming)) int streaming_decl(); +__attribute__((arm_streaming_compatible)) int streaming_compatible_decl(); +__attribute__((arm_shared_za)) int shared_za_decl(); +__attribute__((arm_preserves_za)) int preserves_za_decl(); +__attribute__((arm_new_za)) int new_za_decl(); + +// == FUNCTION DEFINITIONS == + +// CHECK-LABEL: @streaming_caller() +// CHECK-SAME: #[[SM_ENABLED:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__attribute__((arm_streaming)) int streaming_caller() { + return normal_callee(); +} + +// CHECK: declare i32 @normal_callee() #[[NORMAL_DECL:[0-9]+]] + + +// CHECK-LABEL: @streaming_callee() +// CHECK-SAME: #[[SM_ENABLED]] +// CHECK: call i32 @streaming_decl() #[[SM_ENABLED_CALL:[0-9]+]] +// +__attribute__((arm_streaming)) int streaming_callee() { + return streaming_decl(); +} + +// CHECK: declare i32 @streaming_decl() #[[SM_ENABLED_DECL:[0-9]+]] + +// CHECK-LABEL: @streaming_compatible_caller() +// CHECK-SAME: #[[SM_COMPATIBLE:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__attribute__((arm_streaming_compatible)) int streaming_compatible_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @streaming_compatible_callee() +// CHECK-SAME: #[[SM_COMPATIBLE]] +// CHECK: call i32 @streaming_compatible_decl() #[[SM_COMPATIBLE_CALL:[0-9]+]] +// +__attribute__((arm_streaming_compatible)) int streaming_compatible_callee() { + return streaming_compatible_decl(); +} + +// CHECK: declare i32 @streaming_compatible_decl() #[[SM_COMPATIBLE_DECL:[0-9]+]] + +// CHECK-LABEL: @locally_streaming_caller() +// CHECK-SAME: #[[SM_BODY:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__attribute__((arm_locally_streaming)) int locally_streaming_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @locally_streaming_callee() +// CHECK-SAME: #[[SM_BODY]] +// CHECK: call i32 @locally_streaming_caller() #[[SM_BODY_CALL:[0-9]+]] +// +__attribute__((arm_locally_streaming)) int locally_streaming_callee() { + return locally_streaming_caller(); +} + + +// CHECK-LABEL: @shared_za_caller() +// CHECK-SAME: #[[ZA_SHARED:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__attribute__((arm_shared_za)) int shared_za_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @shared_za_callee() +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call i32 @shared_za_decl() #[[ZA_SHARED_CALL:[0-9]+]] +// +__attribute__((arm_shared_za)) int shared_za_callee() { + return shared_za_decl(); +} + +// CHECK: declare i32 @shared_za_decl() #[[ZA_SHARED_DECL:[0-9]+]] + + +// CHECK-LABEL: @preserves_za_caller() +// CHECK-SAME: #[[ZA_PRESERVED:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__attribute__((arm_preserves_za)) int preserves_za_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @preserves_za_callee() +// CHECK-SAME: #[[ZA_PRESERVED]] +// CHECK: call i32 @preserves_za_decl() #[[ZA_PRESERVED_CALL:[0-9]+]] +// +__attribute__((arm_preserves_za)) int preserves_za_callee() { + return preserves_za_decl(); +} + +// CHECK: declare i32 @preserves_za_decl() #[[ZA_PRESERVED_DECL:[0-9]+]] + + +// CHECK-LABEL: @new_za_caller() +// CHECK-SAME: #[[ZA_NEW:[0-9]+]] +// CHECK: call i32 @normal_callee() +// +__attribute__((arm_new_za)) int new_za_caller() { + return normal_callee(); +} + +// CHECK-LABEL: @new_za_callee() +// CHECK-SAME: #[[ZA_NEW]] +// CHECK: call i32 @new_za_decl() #[[ZA_NEW_CALL:[0-9]+]] +// +__attribute__((arm_new_za)) int new_za_callee() { + return new_za_decl(); +} + +// CHECK: declare i32 @new_za_decl() #[[ZA_NEW_DECL:[0-9]+]] + + +// Ensure that the attributes are correctly propagated to function types +// and also to callsites. +typedef void __attribute__((arm_streaming)) (*s_ptrty) (int, int); +typedef void __attribute__((arm_streaming_compatible)) (*sc_ptrty) (int, int); +typedef void __attribute__((arm_new_za)) (*nz_ptrty) (int, int); +typedef void __attribute__((arm_shared_za)) (*sz_ptrty) (int, int); +typedef void __attribute__((arm_preserves_za)) (*pz_ptrty) (int, int); + +// CHECK-LABEL: @test_streaming_ptrty( +// CHECK-SAME: #[[NORMAL_DEF:[0-9]+]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[SM_ENABLED_CALL]] +// +void test_streaming_ptrty(s_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_streaming_compatible_ptrty( +// CHECK-SAME: #[[NORMAL_DEF]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[SM_COMPATIBLE_CALL]] +// +void test_streaming_compatible_ptrty(sc_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_new_za( +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_NEW_CALL]] +// +void __attribute__((arm_shared_za)) test_new_za(nz_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_shared_za( +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_SHARED_CALL]] +// +void __attribute__((arm_shared_za)) test_shared_za(sz_ptrty f, int x, int y) { return f(x, y); } +// CHECK-LABEL: @test_preserved_za( +// CHECK-SAME: #[[ZA_SHARED]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ZA_PRESERVED_CALL]] +// +void __attribute__((arm_shared_za)) test_preserved_za(pz_ptrty f, int x, int y) { return f(x, y); } + +// CHECK-LABEL: @test_indirect_streaming_ptrty( +// CHECK-SAME: #[[NORMAL_DEF:[0-9]+]] +// CHECK: call void [[F:%.*]](i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[SM_ENABLED_CALL]] +// +typedef s_ptrty **indirect_s_ptrty; +void test_indirect_streaming_ptrty(indirect_s_ptrty fptr, int x, int y) { return (**fptr)(x, y); } +} // extern "C" + +// +// Test that having the attribute in different places (on declaration and on type) +// both results in the attribute being applied to the type. +// + +// CHECK-LABEL: @_Z24test_same_type_streamingv( +// CHECK: call void @_Z10streaming1v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z10streaming2v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z10streaming3v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z20same_type_streaming1v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z20same_type_streaming2v() #[[SM_ENABLED_CALL]] +// CHECK: call void @_Z20same_type_streaming3v() #[[SM_ENABLED_CALL]] +// CHECK: ret void +// CHECK: } +// CHECK: declare void @_Z10streaming1v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z10streaming2v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z10streaming3v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z20same_type_streaming1v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z20same_type_streaming2v() #[[SM_ENABLED_DECL]] +// CHECK: declare void @_Z20same_type_streaming3v() #[[SM_ENABLED_DECL]] +__attribute__((arm_streaming)) void streaming1(); +void __attribute__((arm_streaming)) streaming2(); +void streaming3() __attribute__((arm_streaming)); +decltype(streaming1) same_type_streaming1; +decltype(streaming2) same_type_streaming2; +decltype(streaming3) same_type_streaming3; +void test_same_type_streaming() { + streaming1(); + streaming2(); + streaming3(); + same_type_streaming1(); + same_type_streaming2(); + same_type_streaming3(); +} + +// +// Test overloading; the attribute is not required for overloaded types and +// does not apply if not specified. +// + +// CHECK-LABEL: @_Z12overloadedfni( +// CHECK-SAME: #[[SM_ENABLED]] +int __attribute__((arm_streaming)) overloadedfn(int x) { return x; } +// CHECK-LABEL: @_Z12overloadedfnf( +// CHECK-SAME: #[[NORMAL_DEF]] +// +float overloadedfn(float x) { return x; } +// CHECK-LABEL: @_Z13test_overloadi( +// CHECK-SAME: #[[NORMAL_DEF]] +// +int test_overload(int x) { return overloadedfn(x); } +// CHECK-LABEL: @_Z13test_overloadf( +// CHECK-SAME: #[[NORMAL_DEF]] +// +float test_overload(float x) { return overloadedfn(x); } + +// CHECK-LABEL: @_Z11test_lambdai( +// CHECK-SAME: #[[NORMAL_DEF]] +// CHECK: call noundef i32 @"_ZZ11test_lambdaiENK3$_0clEi"({{.*}}) #[[SM_ENABLED_CALL]] +// +// CHECK: @"_ZZ11test_lambdaiENK3$_0clEi"( +// CHECK-SAME: #[[SM_ENABLED]] +int test_lambda(int x) { + auto F = [](int x) __attribute__((arm_streaming)) { return x; }; + return F(x); +} + +// CHECK-LABEL: @_Z27test_template_instantiationv( +// CHECK-SAME: #[[NORMAL_DEF]] +// CHECK: call noundef i32 @_Z15template_functyIiET_S0_(i32 noundef 12) #[[SM_ENABLED_CALL]] +// +// CHECK: @_Z15template_functyIiET_S0_( +// CHECK-SAME: #[[SM_ENABLED]] +template +Ty template_functy(Ty x) __attribute__((arm_streaming)) { return x; } +int test_template_instantiation() { return template_functy(12); } + +// +// Test that arm_locally_streaming is inherited by future redeclarations, +// even when they don't specify the attribute. +// + +// CHECK: define {{.*}} @_Z25locally_streaming_inheritv( +// CHECK-SAME: #[[SM_BODY]] +__attribute__((arm_locally_streaming)) void locally_streaming_inherit(); +void locally_streaming_inherit() { + streaming_decl(); +} + +// CHECK: attributes #[[SM_ENABLED]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_enabled" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[NORMAL_DECL]] = { "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_ENABLED_DECL]] = { "aarch64_pstate_sm_enabled" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_COMPATIBLE]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_compatible" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_COMPATIBLE_DECL]] = { "aarch64_pstate_sm_compatible" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_BODY]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_sm_body" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_SHARED]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_za_shared" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_SHARED_DECL]] = { "aarch64_pstate_za_shared" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_PRESERVED]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_za_preserved" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_PRESERVED_DECL]] = { "aarch64_pstate_za_preserved" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_NEW]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "aarch64_pstate_za_new" "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[ZA_NEW_DECL]] = { "aarch64_pstate_za_new" "frame-pointer"="none" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[NORMAL_DEF]] = { mustprogress noinline nounwind optnone vscale_range(1,16) "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+sme" } +// CHECK: attributes #[[SM_ENABLED_CALL]] = { "aarch64_pstate_sm_enabled" } +// CHECK: attributes #[[SM_COMPATIBLE_CALL]] = { "aarch64_pstate_sm_compatible" } +// CHECK: attributes #[[SM_BODY_CALL]] = { "aarch64_pstate_sm_body" } +// CHECK: attributes #[[ZA_SHARED_CALL]] = { "aarch64_pstate_za_shared" } +// CHECK: attributes #[[ZA_PRESERVED_CALL]] = { "aarch64_pstate_za_preserved" } +// CHECK: attributes #[[ZA_NEW_CALL]] = { "aarch64_pstate_za_new" } diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add.c @@ -0,0 +1,180 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -D__ARM_FEATURE_SME_I64I64=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -D__ARM_FEATURE_SME_I64I64=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// == HORIZONTAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddha_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za32_s32u10__SVBool_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { + svaddha_za32_s32(3, pn, pm, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddha_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za32_u32u10__SVBool_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { + svaddha_za32_u32(3, pn, pm, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddha_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za64_s64u10__SVBool_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { + svaddha_za64_s64(3, pn, pm, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddha_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za64_u64u10__SVBool_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { + svaddha_za64_u64(3, pn, pm, zn); +} + +// == VERTICAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddva_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za32_s32u10__SVBool_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { + svaddva_za32_s32(3, pn, pm, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddva_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za32_u32u10__SVBool_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { + svaddva_za32_u32(3, pn, pm, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddva_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za64_s64u10__SVBool_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { + svaddva_za64_s64(3, pn, pm, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svaddva_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za64_u64u10__SVBool_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { + svaddva_za64_u64(3, pn, pm, zn); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_cnt.c @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// CHECK-LABEL: @test_svcntsb( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z12test_svcntsbv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +__attribute__((arm_streaming_compatible)) +uint64_t test_svcntsb(void) { + return svcntsb(); +} + +// CHECK-LABEL: @test_svcntsh( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh() +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z12test_svcntshv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsh() +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +__attribute__((arm_streaming_compatible)) +uint64_t test_svcntsh(void) { + return svcntsh(); +} + +// CHECK-LABEL: @test_svcntsw( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw() +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z12test_svcntswv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsw() +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +__attribute__((arm_streaming_compatible)) +uint64_t test_svcntsw(void) { + return svcntsw(); +} + +// CHECK-LABEL: @test_svcntsd( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CHECK-NEXT: ret i64 [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z12test_svcntsdv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsd() +// CPP-CHECK-NEXT: ret i64 [[TMP0]] +// +__attribute__((arm_streaming_compatible)) +uint64_t test_svcntsd(void) { + return svcntsd(); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_loads.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_loads.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_loads.c @@ -0,0 +1,498 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// == HORIZONTAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svld1_hor_za8ju10__SVBool_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_za8(uint32_t slice_base, svbool_t pg, const int8_t *base) { + svld1_hor_za8(0, slice_base, 15, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_hor_za16ju10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_za16(uint32_t slice_base, svbool_t pg, const int16_t *base) { + svld1_hor_za16(1, slice_base, 7, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_hor_za32ju10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_za32(uint32_t slice_base, svbool_t pg, const int32_t *base) { + svld1_hor_za32(3, slice_base, 3, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_hor_za64ju10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_za64(uint32_t slice_base, svbool_t pg, const int64_t *base) { + svld1_hor_za64(7, slice_base, 1, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svld1_hor_za128ju10__SVBool_tPKn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_za128(uint32_t slice_base, svbool_t pg, const __int128_t *base) { + svld1_hor_za128(15, slice_base, 0, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svld1_hor_vnum_za8ju10__SVBool_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.horiz( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, const int8_t *base) { + svld1_hor_vnum_za8(0, slice_base, 15, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_hor_vnum_za16ju10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.horiz( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, const int16_t *base) { + svld1_hor_vnum_za16(1, slice_base, 7, pg, base, 3); +} + +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_hor_vnum_za32ju10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.horiz( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, const int32_t *base) { + svld1_hor_vnum_za32(3, slice_base, 3, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_hor_vnum_za64ju10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.horiz( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, const int64_t *base) { + svld1_hor_vnum_za64(7, slice_base, 1, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_hor_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svld1_hor_vnum_za128ju10__SVBool_tPKn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.horiz( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, const __int128_t *base) { + svld1_hor_vnum_za128(15, slice_base, 0, pg, base, 3); +} + +// == VERTICAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svld1_ver_za8ju10__SVBool_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_za8(uint32_t slice_base, svbool_t pg, const int8_t *base) { + svld1_ver_za8(0, slice_base, 15, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_ver_za16ju10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_za16(uint32_t slice_base, svbool_t pg, const int16_t *base) { + svld1_ver_za16(1, slice_base, 7, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_ver_za32ju10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_za32(uint32_t slice_base, svbool_t pg, const int32_t *base) { + svld1_ver_za32(3, slice_base, 3, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svld1_ver_za64ju10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_za64(uint32_t slice_base, svbool_t pg, const int64_t *base) { + svld1_ver_za64(7, slice_base, 1, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svld1_ver_za128ju10__SVBool_tPKn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_za128(uint32_t slice_base, svbool_t pg, const __int128_t *base) { + svld1_ver_za128(15, slice_base, 0, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svld1_ver_vnum_za8ju10__SVBool_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1b.vert( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, const int8_t *base) { + svld1_ver_vnum_za8(0, slice_base, 15, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_ver_vnum_za16ju10__SVBool_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1h.vert( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, const int16_t *base) { + svld1_ver_vnum_za16(1, slice_base, 7, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_ver_vnum_za32ju10__SVBool_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1w.vert( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, const int32_t *base) { + svld1_ver_vnum_za32(3, slice_base, 3, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svld1_ver_vnum_za64ju10__SVBool_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1d.vert( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, const int64_t *base) { + svld1_ver_vnum_za64(7, slice_base, 1, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svld1_ver_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svld1_ver_vnum_za128ju10__SVBool_tPKn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ld1q.vert( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svld1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, const __int128_t *base) { + svld1_ver_vnum_za128(15, slice_base, 0, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svldr_vnum_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 15 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TMP3]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svldr_vnum_zaju10__SVBool_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 15 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.ldr(i32 [[TMP3]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svldr_vnum_za(uint32_t slice_base, svbool_t pg, const uint8_t *base) { + svldr_vnum_za(slice_base, 15, pg, base); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mem_ops.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mem_ops.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mem_ops.c @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// CHECK-LABEL: @test_arm_sc_memcpy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret ptr [[CALL]] +// +// CPP-CHECK-LABEL: @_Z18test_arm_sc_memcpyPvPKvm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memcpy(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2:[0-9]+]] +// CPP-CHECK-NEXT: ret ptr [[CALL]] +// +__attribute__((arm_streaming_compatible)) +void *test_arm_sc_memcpy(void *dest, const void *src, size_t n) { + return __arm_sc_memcpy(dest, src, n); +} + +// CHECK-LABEL: @test_arm_sc_memmove( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2]] +// CHECK-NEXT: ret ptr [[CALL]] +// +// CPP-CHECK-LABEL: @_Z19test_arm_sc_memmovePvPKvm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memmove(ptr noundef [[DEST:%.*]], ptr noundef [[SRC:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2]] +// CPP-CHECK-NEXT: ret ptr [[CALL]] +// +__attribute__((arm_streaming_compatible)) +void *test_arm_sc_memmove(void *dest, const void *src, size_t n) { + return __arm_sc_memmove(dest, src, n); +} + +// CHECK-LABEL: @test_arm_sc_memset( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[DEST:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2]] +// CHECK-NEXT: ret ptr [[CALL]] +// +// CPP-CHECK-LABEL: @_Z18test_arm_sc_memsetPvim( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memset(ptr noundef [[DEST:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2]] +// CPP-CHECK-NEXT: ret ptr [[CALL]] +// +__attribute__((arm_streaming_compatible)) +void *test_arm_sc_memset(void *dest, int c, size_t n) { + return __arm_sc_memset(dest, c, n); +} + +// CHECK-LABEL: @test_arm_sc_memchr( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[DEST:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2]] +// CHECK-NEXT: ret ptr [[CALL]] +// +// CPP-CHECK-LABEL: @_Z18test_arm_sc_memchrPvim( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CALL:%.*]] = tail call ptr @__arm_sc_memchr(ptr noundef [[DEST:%.*]], i32 noundef [[C:%.*]], i64 noundef [[N:%.*]]) #[[ATTR2]] +// CPP-CHECK-NEXT: ret ptr [[CALL]] +// +__attribute__((arm_streaming_compatible)) +void *test_arm_sc_memchr(void *dest, int c, size_t n) { + return __arm_sc_memchr(dest, c, n); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mop.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mop.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mop.c @@ -0,0 +1,408 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -D__ARM_FEATURE_SME_I64I64=1 -D__ARM_FEATURE_SME_F64F64=1 -S -O1 -emit-llvm \ +// RUN: -triple aarch64-none-linux-gnu -target-feature +sme -Werror -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -D__ARM_FEATURE_SME_I64I64=1 -D__ARM_FEATURE_SME_F64F64=1 -S -O1 -emit-llvm \ +// RUN: -triple aarch64-none-linux-gnu -target-feature +sme -Werror -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// == MOPA / MOPS == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmopa_za32_bf16u10__SVBool_tu10__SVBool_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { + svmopa_za32_bf16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za32_f16u10__SVBool_tu10__SVBool_tu13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { + svmopa_za32_f16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmopa_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + svmopa_za32_s8(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmopa_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + svmopa_za32_u8(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + svmopa_za64_s16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + svmopa_za64_u16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmops_za32_bf16u10__SVBool_tu10__SVBool_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { + svmops_za32_bf16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za32_f16u10__SVBool_tu10__SVBool_tu13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { + svmops_za32_f16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmops_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + svmops_za32_s8(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmops_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + svmops_za32_u8(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + svmops_za64_s16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + svmops_za64_u16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za32_f32u10__SVBool_tu10__SVBool_tu13__SVFloat32_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + svmopa_za32_f32(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmopa_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za64_f64u10__SVBool_tu10__SVBool_tu13__SVFloat64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { + svmopa_za64_f64(7, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za32_f32u10__SVBool_tu10__SVBool_tu13__SVFloat32_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + svmops_za32_f32(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmops_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za64_f64u10__SVBool_tu10__SVBool_tu13__SVFloat64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i32 7, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { + svmops_za64_f64(7, pn, pm, zn, zm); +} + +// == MIXED SIGN MOPA / MOPS == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svsumops_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svsumops_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + svsumops_za32_s8(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svsumops_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svsumops_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + svsumops_za64_s16(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svusmops_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svusmops_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 3, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + svusmops_za32_u8(3, pn, pm, zn, zm); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svusmops_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svusmops_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 3, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + svusmops_za64_u16(3, pn, pm, zn, zm); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_reads.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_reads.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_reads.c @@ -0,0 +1,954 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// == HORIZONTAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za8_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za8_s8_mu10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svread_hor_za8_s8_m(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za8_s8_m(zd, pg, 0, slice_base, 15); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za8_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za8_u8_mu11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svread_hor_za8_u8_m(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za8_u8_m(zd, pg, 0, slice_base, 15); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za16_s16_mu11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint16_t test_svread_hor_za16_s16_m(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za16_s16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za16_u16_mu12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint16_t test_svread_hor_za16_u16_m(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za16_u16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za16_f16_mu13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat16_t test_svread_hor_za16_f16_m(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za16_f16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za16_bf16_mu14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svread_hor_za16_bf16_m(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za16_bf16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za32_s32_mu11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint32_t test_svread_hor_za32_s32_m(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za32_s32_m(zd, pg, 3, slice_base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za32_u32_mu12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint32_t test_svread_hor_za32_u32_m(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za32_u32_m(zd, pg, 3, slice_base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za32_f32_mu13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat32_t test_svread_hor_za32_f32_m(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za32_f32_m(zd, pg, 3, slice_base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za64_s64_mu11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint64_t test_svread_hor_za64_s64_m(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za64_s64_m(zd, pg, 7, slice_base, 1); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za64_u64_mu12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint64_t test_svread_hor_za64_u64_m(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za64_u64_m(zd, pg, 7, slice_base, 1); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za64_f64_mu13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat64_t test_svread_hor_za64_f64_m(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za64_f64_m(zd, pg, 7, slice_base, 1); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za128_s8_mu10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svread_hor_za128_s8_m(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_s8_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za128_u8_mu11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svread_hor_za128_u8_m(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_u8_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_s16_mu11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svread_hor_za128_s16_m(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_s16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_u16_mu12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svread_hor_za128_u16_m(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_u16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_f16_mu13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svread_hor_za128_f16_m(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_f16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za128_bf16_mu14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svread_hor_za128_bf16_m(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_bf16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_s32_mu11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svread_hor_za128_s32_m(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_s32_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_u32_mu12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svread_hor_za128_u32_m(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_u32_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_f32_mu13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svread_hor_za128_f32_m(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_f32_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_s64_mu11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svread_hor_za128_s64_m(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_s64_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_u64_mu12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svread_hor_za128_u64_m(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_u64_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za128_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_f64_mu13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svread_hor_za128_f64_m(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_hor_za128_f64_m(zd, pg, 15, slice_base, 0); +} + +// == VERTICAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za8_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za8_s8_mu10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint8_t test_svread_ver_za8_s8_m(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za8_s8_m(zd, pg, 0, slice_base, 15); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za8_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za8_u8_mu11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint8_t test_svread_ver_za8_u8_m(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za8_u8_m(zd, pg, 0, slice_base, 15); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za16_s16_mu11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint16_t test_svread_ver_za16_s16_m(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za16_s16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za16_u16_mu12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint16_t test_svread_ver_za16_u16_m(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za16_u16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za16_f16_mu13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat16_t test_svread_ver_za16_f16_m(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za16_f16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za16_bf16_mu14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svbfloat16_t test_svread_ver_za16_bf16_m(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za16_bf16_m(zd, pg, 1, slice_base, 7); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za32_s32_mu11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint32_t test_svread_ver_za32_s32_m(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za32_s32_m(zd, pg, 3, slice_base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za32_u32_mu12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint32_t test_svread_ver_za32_u32_m(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za32_u32_m(zd, pg, 3, slice_base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za32_f32_mu13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat32_t test_svread_ver_za32_f32_m(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za32_f32_m(zd, pg, 3, slice_base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za64_s64_mu11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svint64_t test_svread_ver_za64_s64_m(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za64_s64_m(zd, pg, 7, slice_base, 1); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za64_u64_mu12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svuint64_t test_svread_ver_za64_u64_m(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za64_u64_m(zd, pg, 7, slice_base, 1); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CHECK-NEXT: ret [[TMP2]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za64_f64_mu13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TMP1]]) +// CPP-CHECK-NEXT: ret [[TMP2]] +// +svfloat64_t test_svread_ver_za64_f64_m(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za64_f64_m(zd, pg, 7, slice_base, 1); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za128_s8_mu10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svread_ver_za128_s8_m(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_s8_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za128_u8_mu11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svread_ver_za128_u8_m(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_u8_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_s16_mu11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svread_ver_za128_s16_m(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_s16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_u16_mu12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svread_ver_za128_u16_m(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_u16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_f16_mu13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat16_t test_svread_ver_za128_f16_m(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_f16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za128_bf16_mu14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbfloat16_t test_svread_ver_za128_bf16_m(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_bf16_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_s32_mu11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svread_ver_za128_s32_m(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_s32_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_u32_mu12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svread_ver_za128_u32_m(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_u32_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_f32_mu13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat32_t test_svread_ver_za128_f32_m(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_f32_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_s64_mu11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svread_ver_za128_s64_m(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_s64_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_u64_mu12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svread_ver_za128_u64_m(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_u64_m(zd, pg, 15, slice_base, 0); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za128_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_f64_mu13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svfloat64_t test_svread_ver_za128_f64_m(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return svread_ver_za128_f64_m(zd, pg, 15, slice_base, 0); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_state_funs.c @@ -0,0 +1,79 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// CHECK-LABEL: @test_in_streaming_mode_from_normal( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR4:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1 +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +// CHECK-NEXT: ret i1 [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z34test_in_streaming_mode_from_normalv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR4:[0-9]+]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +// CPP-CHECK-NEXT: ret i1 [[TMP3]] +// +bool test_in_streaming_mode_from_normal(void) { + return __arm_in_streaming_mode(); +} + +// +// +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_in_streaming_mode_from_streaming( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR4]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1 +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +// CHECK-NEXT: ret i1 [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z37test_in_streaming_mode_from_streamingv( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_sme_preservemost_from_x2 { i64, i64 } @__arm_sme_state() #[[ATTR4]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i64, i64 } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = and i64 [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[TMP2]], 0 +// CPP-CHECK-NEXT: ret i1 [[TMP3]] +// +bool test_in_streaming_mode_from_streaming(void) { + return __arm_in_streaming_mode(); +} + +// CHECK-LABEL: @test_disable_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @__arm_disable_za() #[[ATTR5:[0-9]+]] +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_disable_zav( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @__arm_disable_za() #[[ATTR5:[0-9]+]] +// CPP-CHECK-NEXT: ret void +// +void test_disable_za(void) { + __arm_disable_za(); +} + +// +// +__attribute__((arm_streaming_compatible, arm_shared_za)) +// CHECK-LABEL: @test_svundef_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z15test_svundef_zav( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: ret void +// +void test_svundef_za(void) { + svundef_za(); +} + diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_stores.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_stores.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_stores.c @@ -0,0 +1,499 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// == HORIZONTAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svst1_hor_za8ju10__SVBool_tPa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_za8(uint32_t slice_base, svbool_t pg, int8_t *base) { + svst1_hor_za8(0, slice_base, 15, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_hor_za16ju10__SVBool_tPs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_za16(uint32_t slice_base, svbool_t pg, int16_t *base) { + svst1_hor_za16(1, slice_base, 7, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_hor_za32ju10__SVBool_tPi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_za32(uint32_t slice_base, svbool_t pg, int32_t *base) { + svst1_hor_za32(3, slice_base, 3, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_hor_za64ju10__SVBool_tPl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_za64(uint32_t slice_base, svbool_t pg, int64_t *base) { + svst1_hor_za64(7, slice_base, 1, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst1_hor_za128ju10__SVBool_tPn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_za128(uint32_t slice_base, svbool_t pg, __int128_t *base) { + svst1_hor_za128(15, slice_base, 0, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svst1_hor_vnum_za8ju10__SVBool_tPa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.horiz( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_vnum_za8(uint32_t slice_base, svbool_t pg, int8_t *base) { + svst1_hor_vnum_za8(0, slice_base, 15, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_hor_vnum_za16ju10__SVBool_tPs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.horiz( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_vnum_za16(uint32_t slice_base, svbool_t pg, int16_t *base) { + svst1_hor_vnum_za16(1, slice_base, 7, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_hor_vnum_za32ju10__SVBool_tPi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.horiz( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_vnum_za32(uint32_t slice_base, svbool_t pg, int32_t *base) { + svst1_hor_vnum_za32(3, slice_base, 3, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_hor_vnum_za64ju10__SVBool_tPl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.horiz( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_vnum_za64(uint32_t slice_base, svbool_t pg, int64_t *base) { + svst1_hor_vnum_za64(7, slice_base, 1, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_hor_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svst1_hor_vnum_za128ju10__SVBool_tPn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.horiz( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_hor_vnum_za128(uint32_t slice_base, svbool_t pg, __int128_t *base) { + svst1_hor_vnum_za128(15, slice_base, 0, pg, base, 3); +} + +// == VERTICAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svst1_ver_za8ju10__SVBool_tPa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[BASE:%.*]], i32 0, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_za8(uint32_t slice_base, svbool_t pg, int8_t *base) { + svst1_ver_za8(0, slice_base, 15, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_ver_za16ju10__SVBool_tPs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP1]], ptr [[BASE:%.*]], i32 1, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_za16(uint32_t slice_base, svbool_t pg, int16_t *base) { + svst1_ver_za16(1, slice_base, 7, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_ver_za32ju10__SVBool_tPi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP1]], ptr [[BASE:%.*]], i32 3, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_za32(uint32_t slice_base, svbool_t pg, int32_t *base) { + svst1_ver_za32(3, slice_base, 3, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svst1_ver_za64ju10__SVBool_tPl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP1]], ptr [[BASE:%.*]], i32 7, i32 [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_za64(uint32_t slice_base, svbool_t pg, int64_t *base) { + svst1_ver_za64(7, slice_base, 1, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svst1_ver_za128ju10__SVBool_tPn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP0]], ptr [[BASE:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_za128(uint32_t slice_base, svbool_t pg, __int128_t *base) { + svst1_ver_za128(15, slice_base, 0, pg, base); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_vnum_za8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svst1_ver_vnum_za8ju10__SVBool_tPa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1b.vert( [[PG:%.*]], ptr [[TMP2]], i32 0, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_vnum_za8(uint32_t slice_base, svbool_t pg, int8_t *base) { + svst1_ver_vnum_za8(0, slice_base, 15, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_vnum_za16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_ver_vnum_za16ju10__SVBool_tPs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1h.vert( [[TMP4]], ptr [[TMP2]], i32 1, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_vnum_za16(uint32_t slice_base, svbool_t pg, int16_t *base) { + svst1_ver_vnum_za16(1, slice_base, 7, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_vnum_za32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_ver_vnum_za32ju10__SVBool_tPi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1w.vert( [[TMP4]], ptr [[TMP2]], i32 3, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_vnum_za32(uint32_t slice_base, svbool_t pg, int32_t *base) { + svst1_ver_vnum_za32(3, slice_base, 3, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_vnum_za64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svst1_ver_vnum_za64ju10__SVBool_tPl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1d.vert( [[TMP4]], ptr [[TMP2]], i32 7, i32 [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_vnum_za64(uint32_t slice_base, svbool_t pg, int64_t *base) { + svst1_ver_vnum_za64(7, slice_base, 1, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svst1_ver_vnum_za128( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svst1_ver_vnum_za128ju10__SVBool_tPn( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv1i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.st1q.vert( [[TMP3]], ptr [[TMP2]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_ver_vnum_za128(uint32_t slice_base, svbool_t pg, __int128_t *base) { + svst1_ver_vnum_za128(15, slice_base, 0, pg, base, 3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svstr_vnum_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 15 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TMP3]], ptr [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svstr_vnum_zaju10__SVBool_tPh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.aarch64.sme.cntsb() +// CPP-CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 15 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[BASE:%.*]], i64 [[TMP1]] +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.str(i32 [[TMP3]], ptr [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstr_vnum_za(uint32_t slice_base, svbool_t pg, uint8_t *base) { + svstr_vnum_za(slice_base, 15, pg, base); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_writes.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_writes.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_writes.c @@ -0,0 +1,956 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// == HORIZONTAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za8_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za8_s8_mju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_s8_m(uint32_t slice_base, svbool_t pg, svint8_t zn) { + svwrite_hor_za8_s8_m(0, slice_base, 15, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za8_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za8_u8_mju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_u8_m(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + svwrite_hor_za8_u8_m(0, slice_base, 15, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za16_s16_mju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_s16_m(uint32_t slice_base, svbool_t pg, svint16_t zn) { + svwrite_hor_za16_s16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za16_u16_mju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_u16_m(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + svwrite_hor_za16_u16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za16_f16_mju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_f16_m(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + svwrite_hor_za16_f16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za16_bf16_mju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_bf16_m(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + svwrite_hor_za16_bf16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za32_s32_mju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_s32_m(uint32_t slice_base, svbool_t pg, svint32_t zn) { + svwrite_hor_za32_s32_m(3, slice_base, 3, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za32_u32_mju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_u32_m(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + svwrite_hor_za32_u32_m(3, slice_base, 3, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za32_f32_mju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_f32_m(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + svwrite_hor_za32_f32_m(3, slice_base, 3, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za64_s64_mju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_s64_m(uint32_t slice_base, svbool_t pg, svint64_t zn) { + svwrite_hor_za64_s64_m(7, slice_base, 1, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za64_u64_mju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_u64_m(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + svwrite_hor_za64_u64_m(7, slice_base, 1, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za64_f64_mju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_f64_m(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + svwrite_hor_za64_f64_m(7, slice_base, 1, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za128_s8_mju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_s8_m(uint32_t slice_base, svbool_t pg, svint8_t zn) { + svwrite_hor_za128_s8_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za128_u8_mju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_u8_m(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + svwrite_hor_za128_u8_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_s16_mju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_s16_m(uint32_t slice_base, svbool_t pg, svint16_t zn) { + svwrite_hor_za128_s16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_u16_mju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_u16_m(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + svwrite_hor_za128_u16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_f16_mju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_f16_m(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + svwrite_hor_za128_f16_m(15, slice_base, 0, pg, zn); +} + +// +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za128_bf16_mju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_bf16_m(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + svwrite_hor_za128_bf16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_s32_mju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_s32_m(uint32_t slice_base, svbool_t pg, svint32_t zn) { + svwrite_hor_za128_s32_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_u32_mju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_u32_m(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + svwrite_hor_za128_u32_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_f32_mju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_f32_m(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + svwrite_hor_za128_f32_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_s64_mju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_s64_m(uint32_t slice_base, svbool_t pg, svint64_t zn) { + svwrite_hor_za128_s64_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_u64_mju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_u64_m(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + svwrite_hor_za128_u64_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za128_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_f64_mju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za128_f64_m(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + svwrite_hor_za128_f64_m(15, slice_base, 0, pg, zn); +} + +// == VERTICAL == + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za8_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za8_s8_mju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_s8_m(uint32_t slice_base, svbool_t pg, svint8_t zn) { + // + svwrite_ver_za8_s8_m(0, slice_base, 15, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za8_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za8_u8_mju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TMP0]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_u8_m(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + svwrite_ver_za8_u8_m(0, slice_base, 15, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za16_s16_mju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_s16_m(uint32_t slice_base, svbool_t pg, svint16_t zn) { + svwrite_ver_za16_s16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za16_u16_mju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_u16_m(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + svwrite_ver_za16_u16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za16_f16_mju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_f16_m(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + svwrite_ver_za16_f16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za16_bf16_mju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_bf16_m(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + svwrite_ver_za16_bf16_m(1, slice_base, 7, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za32_s32_mju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_s32_m(uint32_t slice_base, svbool_t pg, svint32_t zn) { + svwrite_ver_za32_s32_m(3, slice_base, 3, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za32_u32_mju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_u32_m(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + svwrite_ver_za32_u32_m(3, slice_base, 3, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za32_f32_mju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_f32_m(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + svwrite_ver_za32_f32_m(3, slice_base, 3, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za64_s64_mju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_s64_m(uint32_t slice_base, svbool_t pg, svint64_t zn) { + svwrite_ver_za64_s64_m(7, slice_base, 1, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za64_u64_mju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_u64_m(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + svwrite_ver_za64_u64_m(7, slice_base, 1, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za64_f64_mju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[TMP1]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_f64_m(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + svwrite_ver_za64_f64_m(7, slice_base, 1, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za128_s8_mju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_s8_m(uint32_t slice_base, svbool_t pg, svint8_t zn) { + svwrite_ver_za128_s8_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za128_u8_mju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_u8_m(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + svwrite_ver_za128_u8_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_s16_mju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_s16_m(uint32_t slice_base, svbool_t pg, svint16_t zn) { + svwrite_ver_za128_s16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_u16_mju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_u16_m(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + svwrite_ver_za128_u16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_f16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_f16_mju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_f16_m(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + svwrite_ver_za128_f16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_bf16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za128_bf16_mju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_bf16_m(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + svwrite_ver_za128_bf16_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_s32_mju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_s32_m(uint32_t slice_base, svbool_t pg, svint32_t zn) { + svwrite_ver_za128_s32_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_u32_mju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_u32_m(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + svwrite_ver_za128_u32_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_f32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_f32_mju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_f32_m(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + svwrite_ver_za128_f32_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_s64_mju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_s64_m(uint32_t slice_base, svbool_t pg, svint64_t zn) { + svwrite_ver_za128_s64_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_u64_mju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_u64_m(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + svwrite_ver_za128_u64_m(15, slice_base, 0, pg, zn); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za128_f64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_f64_mju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za128_f64_m(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + svwrite_ver_za128_f64_m(15, slice_base, 0, pg, zn); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c @@ -0,0 +1,40 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svzero_mask_za_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svzero_mask_za_3v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_mask_za_3(void) { + svzero_mask_za(3); +} + +// +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svzero_za( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z14test_svzero_zav( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) +// CPP-CHECK-NEXT: ret void +// +void test_svzero_za(void) { + svzero_za(); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_add.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_add.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_add.c @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -D__ARM_FEATURE_SME_I64I64=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -fsyntax-only -verify %s + +#include + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svaddha_za32_s32(4, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svaddha_za32_u32(4, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svaddha_za64_s64(8, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svaddha_za64_u64(8, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svaddva_za32_s32(4, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svaddva_za32_u32(4, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svaddva_za64_s64(8, pn, pm, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svaddva_za64_u64(8, pn, pm, zn); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_loads.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_loads.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_loads.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +#include + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_hor_za8_bad_tile(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svld1_hor_za8(1, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_hor_za16_bad_tile(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svld1_hor_za16(2, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_hor_za32_bad_tile(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svld1_hor_za32(4, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_ver_za64_bad_tile(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svld1_ver_za64(8, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_ver_za128_bad_tile(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svld1_ver_za128(16, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_hor_za8_slice_offset(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svld1_hor_za8(0, slice_base, 16, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_ver_za16_slice_offset(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svld1_ver_za16(0, slice_base, 8, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_hor_za32_slice_offset(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svld1_hor_za32(0, slice_base, 4, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_ver_za64_slice_offset(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svld1_ver_za64(0, slice_base, 2, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svld1_ver_za128_slice_offset(uint32_t slice_base, svbool_t pg, const int8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 0]}} + svld1_ver_za128(0, slice_base, 16, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svldr_vnum_za(uint32_t slice_base, svbool_t pg, const uint8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svldr_vnum_za(slice_base, 16, pg, base); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_mop.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_mop.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_mop.c @@ -0,0 +1,128 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -D__ARM_FEATURE_SME_I64I64=1 -D__ARM_FEATURE_SME_F64F64=1 \ +// RUN: -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +#include + +// == MOPS / MOPA == + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmopa_za32_bf16(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmopa_za32_f16(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmopa_za32_s8(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmopa_za32_u8(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svmopa_za64_s16(8, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svmopa_za64_u16(8, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmopa_za32_f32(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svmopa_za64_f64(8, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmops_za32_bf16(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmops_za32_f16(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmops_za32_s8(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmops_za32_u8(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svmops_za64_s16(8, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svmops_za64_u16(8, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svmops_za32_f32(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svmops_za64_f64(8, pn, pm, zn, zm); +} + +// == MIXED SIGN MOPA / MOPS == + +__attribute__((arm_streaming, arm_shared_za)) +void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svsumops_za32_s8(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svsumops_za64_s16(8, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svusmops_za32_u8(4, pn, pm, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svusmops_za64_u16(8, pn, pm, zn, zm); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_reads.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_reads.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_reads.c @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +#include + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_hor_za8_bad_tile(svint8_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svread_hor_za8_s8_m(zd, pg, 1, slice_base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_ver_za16_bad_tile(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svread_ver_za16_u16_m(zd, pg, 2, slice_base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_ver_za32_bad_tile(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svread_ver_za32_f32_m(zd, pg, 4, slice_base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_hor_za64_bad_tile(svint64_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svread_hor_za64_s64_m(zd, pg, 8, slice_base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_hor_za128_bad_tile(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svread_hor_za128_bf16_m(zd, pg, 16, slice_base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_hor_za8_bad_slice(svint8_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svread_hor_za8_s8_m(zd, pg, 0, slice_base, 16); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_ver_za16_bad_slice(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svread_ver_za16_u16_m(zd, pg, 1, slice_base, 8); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_ver_za32_bad_slice(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svread_ver_za32_f32_m(zd, pg, 3, slice_base, 4); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_hor_za64_bad_slice(svint64_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svread_hor_za64_s64_m(zd, pg, 7, slice_base, 2); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svread_hor_za128_bad_slice(svint32_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svread_hor_za128_s32_m(zd, pg, 15, slice_base, 1); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_stores.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_stores.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_stores.c @@ -0,0 +1,69 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +#include + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_hor_za8_bad_tile(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svst1_hor_za8(1, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_hor_za16_bad_tile(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svst1_hor_za16(2, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_hor_za32_bad_tile(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svst1_hor_za32(4, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_ver_za64_bad_tile(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svst1_ver_za64(8, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_ver_za128_bad_tile(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svst1_ver_za128(16, slice_base, 0, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_hor_za8_slice_offset(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svst1_hor_za8(0, slice_base, 16, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_ver_za16_slice_offset(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svst1_ver_za16(0, slice_base, 8, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_hor_za32_slice_offset(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svst1_hor_za32(0, slice_base, 4, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_ver_za64_slice_offset(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svst1_ver_za64(0, slice_base, 2, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svst1_ver_za128_slice_offset(uint32_t slice_base, svbool_t pg, int8_t *base) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svst1_ver_za128(0, slice_base, 1, pg, base); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svstr_vnum_za(uint32_t slice_base, svbool_t pg, uint8_t *base) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svstr_vnum_za(slice_base, 16, pg, base); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_writes.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_writes.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_writes.c @@ -0,0 +1,63 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +#include + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_hor_za8_bad_tile(uint32_t slice_base, svbool_t pg, svint8_t zn) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svwrite_hor_za8_s8_m(1, slice_base, 0, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_ver_za16_bad_tile(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svwrite_ver_za16_u16_m(2, slice_base, 0, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_ver_za32_bad_tile(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svwrite_ver_za32_f32_m(4, slice_base, 0, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_hor_za64_bad_tile(uint32_t slice_base, svbool_t pg, svint64_t zn) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svwrite_hor_za64_s64_m(8, slice_base, 0, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_hor_za128_bad_tile(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svwrite_hor_za128_bf16_m(16, slice_base, 0, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_hor_za8_bad_slice(uint32_t slice_base, svbool_t pg, svint8_t zn) { + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + svwrite_hor_za8_s8_m(0, slice_base, 16, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_ver_za16_bad_slice(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + svwrite_ver_za16_u16_m(1, slice_base, 8, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_ver_za32_bad_slice(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + svwrite_ver_za32_f32_m(3, slice_base, 4, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_hor_za64_bad_slice(uint32_t slice_base, svbool_t pg, svint64_t zn) { + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + svwrite_hor_za64_s64_m(7, slice_base, 2, pg, zn); +} + +__attribute__((arm_streaming, arm_shared_za)) +void test_svwrite_hor_za128_bad_slice(svint32_t zd, svbool_t pg, uint32_t slice_base) { + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + svwrite_hor_za128_s32_m(15, slice_base, 1, pg, zd); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_zero.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/negative/acle_sme_zero.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme -fsyntax-only -verify %s + +#include + +__attribute__((arm_streaming, arm_shared_za)) +void test_svzero_mask_za(void) { + // expected-error@+1 {{argument value 256 is outside the valid range [0, 255]}} + svzero_mask_za(256); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mla.c @@ -0,0 +1,348 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_multi2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv4f32(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmla_multi2_f32j13svfloat32x2_t13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv4f32(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_multi2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) { + SVE_ACLE_FUNC(svmla,,_za32,_f32,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_multi4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv4f32(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmla_multi4_f32j13svfloat32x4_t13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv4f32(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_multi4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) { + SVE_ACLE_FUNC(svmla,,_za32,_f32,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_single2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmla_single2_f32j13svfloat32x2_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_single4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmla_single4_f32j13svfloat32x4_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmla,_single,_za32,_f32,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, indexed +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_lane2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f32j13svfloat32x2_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmla_lane,,_za32,_f32,_vg1x2)(slice_base, 7, zn, zm, 3); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_lane4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f32j13svfloat32x4_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmla_lane,,_za32,_f32,_vg1x4)(slice_base, 7, zn, zm, 3); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_multi2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv2f64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmla_multi2_f64j13svfloat64x2_t13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv2f64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_multi2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) { + SVE_ACLE_FUNC(svmla,,_za64,_f64,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_multi4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv2f64(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmla_multi4_f64j13svfloat64x4_t13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv2f64(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_multi4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) { + SVE_ACLE_FUNC(svmla,,_za64,_f64,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_single2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmla_single2_f64j13svfloat64x2_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_single4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmla_single4_f64j13svfloat64x4_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmla,_single,_za64,_f64,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, indexed +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_lane2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 1) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmla_lane2_f64j13svfloat64x2_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 1) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmla_lane,,_za64,_f64,_vg1x2)(slice_base, 7, zn, zm, 1); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmla_lane4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 1) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmla_lane4_f64j13svfloat64x4_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 1) +// CPP-CHECK-NEXT: ret void +// +void test_svmla_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmla_lane,,_za64,_f64,_vg1x4)(slice_base, 7, zn, zm, 1); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlal.c @@ -0,0 +1,856 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Multi, multi +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_multi2_f16j13svfloat16x2_t13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_f16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_multi2_bf16j14svbfloat16x2_t14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_bf16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_multi2_u16j12svuint16x2_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_u16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_multi2_s16j11svint16x2_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_s16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi4_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8f16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_multi4_f16j13svfloat16x4_t13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8f16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_f16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8bf16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_multi4_bf16j14svbfloat16x4_t14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8bf16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_bf16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_multi4_u16j12svuint16x4_t12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_u16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_multi4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_multi4_s16j11svint16x4_t11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_multi4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) +{ + SVE_ACLE_FUNC(svmlal_za32,_s16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single1_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single1_f16ju13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_f16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svmlal_single1_bf16ju14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_bf16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single1_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single1_u16ju12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_u16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single1_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single1_s16ju11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_s16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single2_f16j13svfloat16x2_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_f16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svmlal_single2_bf16j14svbfloat16x2_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_bf16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single2_u16j12svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_u16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single2_s16j11svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_s16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single4_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single4_f16j13svfloat16x4_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_f16,_vg2x4)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svmlal_single4_bf16j14svbfloat16x4_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_bf16,_vg2x4)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single4_u16j12svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_u16,_vg2x4)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_single4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlal_single4_s16j11svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_single,_za32,_s16,_vg2x4)(slice_base, 6, zn, zm); +} + +// +// Multi, indexed +// + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane1_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane1_f16ju13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_f16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_lane1_bf16ju14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_bf16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane1_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane1_u16ju12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_u16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane1_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane1_s16ju11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_s16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane2_f16j13svfloat16x2_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_f16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_lane2_bf16j14svbfloat16x2_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_bf16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane2_u16j12svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_u16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane2_s16j11svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_s16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane4_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane4_f16j13svfloat16x4_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_f16,_vg2x4)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlal_lane4_bf16j14svbfloat16x4_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_bf16,_vg2x4)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane4_u16j12svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_u16,_vg2x4)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlal_lane4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlal_lane4_s16j11svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlal_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlal,_lane,_za32,_s16,_vg2x4)(slice_base, 6, zn, zm, 7); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mls.c @@ -0,0 +1,348 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_multi2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv4f32(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmls_multi2_f32j13svfloat32x2_t13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv4f32(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_multi2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32x2_t zm) { + SVE_ACLE_FUNC(svmls,,_za32,_f32,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_multi4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv4f32(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmls_multi4_f32j13svfloat32x4_t13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZM]], i64 12) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv4f32(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_multi4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32x4_t zm) { + SVE_ACLE_FUNC(svmls,,_za32,_f32,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_single2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmls_single2_f32j13svfloat32x2_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_single2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_single4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmls_single4_f32j13svfloat32x4_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_single4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmls,_single,_za32,_f32,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, indexed +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_lane2_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmls_lane2_f32j13svfloat32x2_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_lane2_f32(uint32_t slice_base, svfloat32x2_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmls_lane,,_za32,_f32,_vg1x2)(slice_base, 7, zn, zm, 3); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_lane4_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 3) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmls_lane4_f32j13svfloat32x4_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[ZN]], i64 12) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv4f32( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 3) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_lane4_f32(uint32_t slice_base, svfloat32x4_t zn, svfloat32_t zm) { + SVE_ACLE_FUNC(svmls_lane,,_za32,_f32,_vg1x4)(slice_base, 7, zn, zm, 3); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_multi2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv2f64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmls_multi2_f64j13svfloat64x2_t13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv2f64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_multi2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64x2_t zm) { + SVE_ACLE_FUNC(svmls,,_za64,_f64,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_multi4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv2f64(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmls_multi4_f64j13svfloat64x4_t13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZM]], i64 6) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv2f64(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_multi4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64x4_t zm) { + SVE_ACLE_FUNC(svmls,,_za64,_f64,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_single2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmls_single2_f64j13svfloat64x2_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_single2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x2)(slice_base, 7, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_single4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmls_single4_f64j13svfloat64x4_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_single4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmls,_single,_za64,_f64,_vg1x4)(slice_base, 7, zn, zm); +} + +// +// Multi, indexed +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_lane2_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 1) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmls_lane2_f64j13svfloat64x2_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 1) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_lane2_f64(uint32_t slice_base, svfloat64x2_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmls_lane,,_za64,_f64,_vg1x2)(slice_base, 7, zn, zm, 1); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmls_lane4_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 1) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmls_lane4_f64j13svfloat64x4_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[ZN]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv2f64( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 1) +// CPP-CHECK-NEXT: ret void +// +void test_svmls_lane4_f64(uint32_t slice_base, svfloat64x4_t zn, svfloat64_t zm) { + SVE_ACLE_FUNC(svmls_lane,,_za64,_f64,_vg1x4)(slice_base, 7, zn, zm, 1); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mlsl.c @@ -0,0 +1,856 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED,A5) A1##A3##A5 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4,A5) A1##A2##A3##A4##A5 +#endif + +// +// Multi, multi +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_multi2_f16j13svfloat16x2_t13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16x2_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_f16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_multi2_bf16j14svbfloat16x2_t14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16x2_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_bf16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_multi2_u16j12svuint16x2_t12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16x2_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_u16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_multi2_s16j11svint16x2_t11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.multi.vg2x2.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi2_s16(uint32_t slice_base, svint16x2_t zn, svint16x2_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_s16,_vg2x2,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi4_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8f16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_multi4_f16j13svfloat16x4_t13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8f16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16x4_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_f16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8bf16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_multi4_bf16j14svbfloat16x4_t14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8bf16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16x4_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_bf16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_multi4_u16j12svuint16x4_t12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16x4_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_u16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_multi4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_multi4_s16j11svint16x4_t11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZM]], i64 24) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.multi.vg2x4.nxv8i16(i32 [[TMP8]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], [[TMP5]], [[TMP6]], [[TMP7]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_multi4_s16(uint32_t slice_base, svint16x4_t zn, svint16x4_t zm) +{ + SVE_ACLE_FUNC(svmlsl_za32,_s16,_vg2x4,,)(slice_base, 6, zn, zm); +} + +// +// Multi, single +// +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single1_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single1_f16ju13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_f16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svmlsl_single1_bf16ju14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_bf16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single1_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single1_u16ju12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_u16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single1_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single1_s16ju11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_s16,_vg2x1)(slice_base, 14, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single2_f16j13svfloat16x2_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_f16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svmlsl_single2_bf16j14svbfloat16x2_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_bf16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single2_u16j12svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_u16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single2_s16j11svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_s16,_vg2x2)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single4_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single4_f16j13svfloat16x4_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_f16,_vg2x4)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svmlsl_single4_bf16j14svbfloat16x4_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_bf16,_vg2x4)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single4_u16j12svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_u16,_vg2x4)(slice_base, 6, zn, zm); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_single4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svmlsl_single4_s16j11svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]]) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_single4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_single,_za32,_s16,_vg2x4)(slice_base, 6, zn, zm); +} + +// +// Multi, indexed +// + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane1_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane1_f16ju13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane1_f16(uint32_t slice_base, svfloat16_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_f16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane1_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_lane1_bf16ju14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane1_bf16(uint32_t slice_base, svbfloat16_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_bf16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane1_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane1_u16ju12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane1_u16(uint32_t slice_base, svuint16_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_u16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane1_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane1_s16ju11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[SLICE_BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane1_s16(uint32_t slice_base, svint16_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_s16,_vg2x1)(slice_base, 14, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane2_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane2_f16j13svfloat16x2_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane2_f16(uint32_t slice_base, svfloat16x2_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_f16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane2_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_lane2_bf16j14svbfloat16x2_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane2_bf16(uint32_t slice_base, svbfloat16x2_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_bf16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane2_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane2_u16j12svuint16x2_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane2_u16(uint32_t slice_base, svuint16x2_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_u16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane2_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane2_s16j11svint16x2_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 [[TMP3]], [[TMP0]], [[TMP1]], [[TMP2]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane2_s16(uint32_t slice_base, svint16x2_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_s16,_vg2x2)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane4_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane4_f16j13svfloat16x4_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv8f16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane4_f16(uint32_t slice_base, svfloat16x4_t zn, svfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_f16,_vg2x4)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane4_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svmlsl_lane4_bf16j14svbfloat16x4_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv8bf16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane4_bf16(uint32_t slice_base, svbfloat16x4_t zn, svbfloat16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_bf16,_vg2x4)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane4_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane4_u16j12svuint16x4_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane4_u16(uint32_t slice_base, svuint16x4_t zn, svuint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_u16,_vg2x4)(slice_base, 6, zn, zm, 7); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svmlsl_lane4_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmlsl_lane4_s16j11svint16x4_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[ZN]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv8i16( [[ZM:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SLICE_BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 [[TMP5]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], i32 7) +// CPP-CHECK-NEXT: ret void +// +void test_svmlsl_lane4_s16(uint32_t slice_base, svint16x4_t zn, svint16_t zm) +{ + SVE_ACLE_FUNC(svmlsl,_lane,_za32,_s16,_vg2x4)(slice_base, 6, zn, zm, 7); +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_read.c @@ -0,0 +1,1500 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8x2_t test_svread_ver_za8_u8_vg2(uint32_t base) { + return svread_ver_za8_u8_vg2(base, 14); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8x2_t test_svread_ver_za8_s8_vg2(uint32_t base) { + return svread_ver_za8_s8_vg2(base, 14); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8x2_t test_svread_hor_za8_u8_vg2(uint32_t base) { + return svread_hor_za8_u8_vg2(base, 14); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8x2_t test_svread_hor_za8_s8_vg2(uint32_t base) { + return svread_hor_za8_s8_vg2(base, 14); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_u8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint8x4_t test_svread_hor_za8_u8_vg4(uint32_t base) { + return svread_hor_za8_u8_vg4(base, 12); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za8_s8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint8x4_t test_svread_hor_za8_s8_vg4(uint32_t base) { + return svread_hor_za8_s8_vg4(base, 12); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_u8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint8x4_t test_svread_ver_za8_u8_vg4(uint32_t base) { + return svread_ver_za8_u8_vg4(base, 12); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za8_s8_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint8x4_t test_svread_ver_za8_s8_vg4(uint32_t base) { + return svread_ver_za8_s8_vg4(base, 12); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16x2_t test_svread_hor_za16_u16_vg2(uint32_t base) { + return svread_hor_za16_u16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svbfloat16x2_t test_svread_hor_za16_bf16_vg2(uint32_t base) { + return svread_hor_za16_bf16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat16x2_t test_svread_hor_za16_f16_vg2(uint32_t base) { + return svread_hor_za16_f16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint16x2_t test_svread_hor_za16_s16_vg2(uint32_t base) { + return svread_hor_za16_s16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16x2_t test_svread_ver_za16_u16_vg2(uint32_t base) { + return svread_ver_za16_u16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svbfloat16x2_t test_svread_ver_za16_bf16_vg2(uint32_t base) { + return svread_ver_za16_bf16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat16x2_t test_svread_ver_za16_f16_vg2(uint32_t base) { + return svread_ver_za16_f16_vg2(base, 6); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint16x2_t test_svread_ver_za16_s16_vg2(uint32_t base) { + return svread_ver_za16_s16_vg2(base, 6); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_u16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint16x4_t test_svread_hor_za16_u16_vg4(uint32_t base) { + return svread_hor_za16_u16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_hor_za16_bf16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svbfloat16x4_t test_svread_hor_za16_bf16_vg4(uint32_t base) { + return svread_hor_za16_bf16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_f16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat16x4_t test_svread_hor_za16_f16_vg4(uint32_t base) { + return svread_hor_za16_f16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za16_s16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint16x4_t test_svread_hor_za16_s16_vg4(uint32_t base) { + return svread_hor_za16_s16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_u16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint16x4_t test_svread_ver_za16_u16_vg4(uint32_t base) { + return svread_ver_za16_u16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z29test_svread_ver_za16_bf16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32bf16.nxv8bf16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svbfloat16x4_t test_svread_ver_za16_bf16_vg4(uint32_t base) { + return svread_ver_za16_bf16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_f16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat16x4_t test_svread_ver_za16_f16_vg4(uint32_t base) { + return svread_ver_za16_f16_vg4(base, 4); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za16_s16_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint16x4_t test_svread_ver_za16_s16_vg4(uint32_t base) { + return svread_ver_za16_s16_vg4(base, 4); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint32x2_t test_svread_hor_za32_u32_vg2(uint32_t base) { + return svread_hor_za32_u32_vg2(base, 2); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat32x2_t test_svread_hor_za32_f32_vg2(uint32_t base) { + return svread_hor_za32_f32_vg2(base, 2); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint32x2_t test_svread_hor_za32_s32_vg2(uint32_t base) { + return svread_hor_za32_s32_vg2(base, 2); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint32x2_t test_svread_ver_za32_u32_vg2(uint32_t base) { + return svread_ver_za32_u32_vg2(base, 2); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat32x2_t test_svread_ver_za32_f32_vg2(uint32_t base) { + return svread_ver_za32_f32_vg2(base, 2); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint32x2_t test_svread_ver_za32_s32_vg2(uint32_t base) { + return svread_ver_za32_s32_vg2(base, 2); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_u32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svread_hor_za32_u32_vg4(uint32_t base) { + return svread_hor_za32_u32_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_f32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svread_hor_za32_f32_vg4(uint32_t base) { + return svread_hor_za32_f32_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za32_s32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svread_hor_za32_s32_vg4(uint32_t base) { + return svread_hor_za32_s32_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_u32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svread_ver_za32_u32_vg4(uint32_t base) { + return svread_ver_za32_u32_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_f32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svread_ver_za32_f32_vg4(uint32_t base) { + return svread_ver_za32_f32_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za32_s32_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svread_ver_za32_s32_vg4(uint32_t base) { + return svread_ver_za32_s32_vg4(base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svread_hor_za64_u64_vg2(uint32_t base) { + return svread_hor_za64_u64_vg2(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svread_hor_za64_f64_vg2(uint32_t base) { + return svread_hor_za64_f64_vg2(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svread_hor_za64_s64_vg2(uint32_t base) { + return svread_hor_za64_s64_vg2(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svread_ver_za64_u64_vg2(uint32_t base) { + return svread_ver_za64_u64_vg2(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svread_ver_za64_f64_vg2(uint32_t base) { + return svread_ver_za64_f64_vg2(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svread_ver_za64_s64_vg2(uint32_t base) { + return svread_ver_za64_s64_vg2(base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_u64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svread_hor_za64_u64_vg4(uint32_t base) { + return svread_hor_za64_u64_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_f64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svread_hor_za64_f64_vg4(uint32_t base) { + return svread_hor_za64_f64_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_hor_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za64_s64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svread_hor_za64_s64_vg4(uint32_t base) { + return svread_hor_za64_s64_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_u64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svread_ver_za64_u64_vg4(uint32_t base) { + return svread_ver_za64_u64_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_f64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svread_ver_za64_f64_vg4(uint32_t base) { + return svread_ver_za64_f64_vg4(base, 0); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_ver_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za64_s64_vg4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svread_ver_za64_s64_vg4(uint32_t base) { + return svread_ver_za64_s64_vg4(base, 0); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_za64_u64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint64x2_t test_svread_za64_u64_vg1x2(uint32_t base) { + return svread_za64_u64_vg1x2(base, 7); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_za64_f64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat64x2_t test_svread_za64_f64_vg1x2(uint32_t base) { + return svread_za64_f64_vg1x2(base, 7); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_za64_s64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x2j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint64x2_t test_svread_za64_s64_vg1x2(uint32_t base) { + return svread_za64_s64_vg1x2(base, 7); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_za64_u64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_u64_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint64x4_t test_svread_za64_u64_vg1x4(uint32_t base) { + return svread_za64_u64_vg1x4(base, 7); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_za64_f64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_f64_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat64x4_t test_svread_za64_f64_vg1x4(uint32_t base) { + return svread_za64_f64_vg1x4(base, 7); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svread_za64_s64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_za64_s64_vg1x4j( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint64x4_t test_svread_za64_s64_vg1x4(uint32_t base) { + return svread_za64_s64_vg1x4(base, 7); +} + diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_write.c @@ -0,0 +1,1176 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -mem2reg -instcombine -tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -D__ARM_FEATURE_SME_F64F64 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sme2 -target-feature +sme-f64f64 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_u8_vg2j11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_u8_vg2(uint32_t base, svuint8x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg2,)(base, 14, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_s8_vg2j10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_s8_vg2(uint32_t base, svint8x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg2,)(base, 14, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za8_u8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_u8_vg2j11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_u8_vg2(uint32_t base, svuint8x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg2,)(base, 14, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za8_s8_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_s8_vg2j10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 14 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_s8_vg2(uint32_t base, svint8x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg2,)(base, 14, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_u8_vg4j11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_u8_vg4(uint32_t base, svuint8x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_u8,_vg4,)(base, 12, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za8_s8_vg4j10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za8_s8_vg4(uint32_t base, svint8x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za8,_s8,_vg4,)(base, 12, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za8_u8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_u8_vg4j11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_u8_vg4(uint32_t base, svuint8x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_u8,_vg4,)(base, 12, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za8_s8_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za8_s8_vg4j10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[VAL]], i64 48) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 12 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za8_s8_vg4(uint32_t base, svint8x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za8,_s8,_vg4,)(base, 12, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_u16_vg2j12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_u16_vg2(uint32_t base, svuint16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_hor_za16_bf16_vg2j14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_f16_vg2j13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_s16_vg2j11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_s16_vg2(uint32_t base, svint16x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_u16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_u16_vg2j12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_u16_vg2(uint32_t base, svuint16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_bf16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_ver_za16_bf16_vg2j14svbfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv16bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_bf16_vg2(uint32_t base, svbfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_f16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_f16_vg2j13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_f16_vg2(uint32_t base, svfloat16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg2,)(base, 6, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_s16_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_s16_vg2j11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 6 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_s16_vg2(uint32_t base, svint16x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg2,)(base, 6, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_u16_vg4j12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_u16_vg4(uint32_t base, svuint16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_u16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_hor_za16_bf16_vg4j14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_bf16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_f16_vg4j13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_f16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za16_s16_vg4j11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za16_s16_vg4(uint32_t base, svint16x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za16,_s16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_u16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_u16_vg4j12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_u16_vg4(uint32_t base, svuint16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_u16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_bf16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z30test_svwrite_ver_za16_bf16_vg4j14svbfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8bf16.nxv32bf16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_bf16_vg4(uint32_t base, svbfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_bf16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_f16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_f16_vg4j13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_f16_vg4(uint32_t base, svfloat16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_f16,_vg4,)(base, 4, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za16_s16_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za16_s16_vg4j11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[VAL]], i64 24) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 4 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za16_s16_vg4(uint32_t base, svint16x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za16,_s16,_vg4,)(base, 4, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_u32_vg2j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_u32_vg2(uint32_t base, svuint32x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg2,)(base, 2, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_f32_vg2j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg2,)(base, 2, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_s32_vg2j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_s32_vg2(uint32_t base, svint32x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg2,)(base, 2, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_u32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_u32_vg2j12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_u32_vg2(uint32_t base, svuint32x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg2,)(base, 2, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_f32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_f32_vg2j13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_f32_vg2(uint32_t base, svfloat32x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg2,)(base, 2, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_s32_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_s32_vg2j11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 2 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_s32_vg2(uint32_t base, svint32x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg2,)(base, 2, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_u32_vg4j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_u32_vg4(uint32_t base, svuint32x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_u32,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_f32_vg4j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_f32,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za32_s32_vg4j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za32_s32_vg4(uint32_t base, svint32x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za32,_s32,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_u32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_u32_vg4j12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_u32_vg4(uint32_t base, svuint32x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_u32,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_f32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_f32_vg4j13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_f32_vg4(uint32_t base, svfloat32x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_f32,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za32_s32_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za32_s32_vg4j11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[VAL]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za32_s32_vg4(uint32_t base, svint32x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za32,_s32,_vg4,)(base, 0, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_u64_vg2j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_u64_vg2(uint32_t base, svuint64x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg2,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_f64_vg2j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg2,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_s64_vg2j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_s64_vg2(uint32_t base, svint64x2_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg2,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_u64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_u64_vg2j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_u64_vg2(uint32_t base, svuint64x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg2,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_f64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_f64_vg2j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_f64_vg2(uint32_t base, svfloat64x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg2,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_s64_vg2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_s64_vg2j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_s64_vg2(uint32_t base, svint64x2_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg2,)(base, 0, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_u64_vg4j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_u64_vg4(uint32_t base, svuint64x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_u64,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_f64_vg4j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_f64,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_hor_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za64_s64_vg4j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_hor_za64_s64_vg4(uint32_t base, svint64x4_t val) { + SVE_ACLE_FUNC(svwrite_hor_za64,_s64,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_u64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_u64_vg4j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_u64_vg4(uint32_t base, svuint64x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_u64,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_f64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_f64_vg4j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_f64_vg4(uint32_t base, svfloat64x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_f64,_vg4,)(base, 0, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_ver_za64_s64_vg4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za64_s64_vg4j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 [[BASE:%.*]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_ver_za64_s64_vg4(uint32_t base, svint64x4_t val) { + SVE_ACLE_FUNC(svwrite_ver_za64,_s64,_vg4,)(base, 0, val); +} + +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_za64_u64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_u64_vg1x2j12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_u64_vg1x2(uint32_t base, svuint64x2_t val) { + SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x2,)(base, 7, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_za64_f64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_f64_vg1x2j13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_f64_vg1x2(uint32_t base, svfloat64x2_t val) { + SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x2,)(base, 7, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_za64_s64_vg1x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_s64_vg1x2j11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 [[TMP2]], [[TMP0]], [[TMP1]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_s64_vg1x2(uint32_t base, svint64x2_t val) { + SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x2,)(base, 7, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_za64_u64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_u64_vg1x4j12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_u64_vg1x4(uint32_t base, svuint64x4_t val) { + SVE_ACLE_FUNC(svwrite_za64,_u64,_vg1x4,)(base, 7, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_za64_f64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_f64_vg1x4j13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_f64_vg1x4(uint32_t base, svfloat64x4_t val) { + SVE_ACLE_FUNC(svwrite_za64,_f64,_vg1x4,)(base, 7, val); +} +__attribute__((arm_streaming, arm_shared_za)) +// CHECK-LABEL: @test_svwrite_za64_s64_vg1x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 7 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_za64_s64_vg1x4j11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[VAL]], i64 6) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = add i32 [[BASE:%.*]], 7 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 [[TMP4]], [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]]) +// CPP-CHECK-NEXT: ret void +// +void test_svwrite_za64_s64_vg1x4(uint32_t base, svint64x4_t val) { + SVE_ACLE_FUNC(svwrite_za64,_s64,_vg1x4,)(base, 7, val); +} diff --git a/clang/test/CodeGen/aarch64-sve-inline-asm-crash.c b/clang/test/CodeGen/aarch64-sve-inline-asm-crash.c --- a/clang/test/CodeGen/aarch64-sve-inline-asm-crash.c +++ b/clang/test/CodeGen/aarch64-sve-inline-asm-crash.c @@ -20,5 +20,17 @@ return ret ; } +__SVCount_t funcB1(__SVCount_t in) +{ + __SVCount_t ret ; + asm volatile ( + "mov %[ret].b, %[in].b \n" + : [ret] "=w" (ret) + : [in] "w" (in) + :); + + return ret ; +} + // CHECK: funcB1 // CHECK-ERROR: fatal error: error in backend: Cannot select diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_psel.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_psel.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_psel.c @@ -0,0 +1,82 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +// CHECK-LABEL: @test_svpsel_b8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[P1:%.*]], [[P2:%.*]], i32 [[IDX:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z14test_svpsel_b8u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.psel.nxv16i1( [[P1:%.*]], [[P2:%.*]], i32 [[IDX:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpsel_b8(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_b8(p1, p2, idx); +} + +// CHECK-LABEL: @test_svpsel_b16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[TMP0]], [[TMP1]], i32 [[IDX:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP2]]) +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z15test_svpsel_b16u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv8i1( [[TMP0]], [[TMP1]], i32 [[IDX:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP2]]) +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svbool_t test_svpsel_b16(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_b16(p1, p2, idx); +} + +// CHECK-LABEL: @test_svpsel_b32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[TMP0]], [[TMP1]], i32 [[IDX:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP2]]) +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z15test_svpsel_b32u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv4i1( [[TMP0]], [[TMP1]], i32 [[IDX:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP2]]) +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svbool_t test_svpsel_b32(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_b32(p1, p2, idx); +} + +// CHECK-LABEL: @test_svpsel_b64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P1:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[TMP0]], [[TMP1]], i32 [[IDX:%.*]]) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP2]]) +// CHECK-NEXT: ret [[TMP3]] +// +// CPP-CHECK-LABEL: @_Z15test_svpsel_b64u10__SVBool_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P1:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[P2:%.*]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.aarch64.sve.psel.nxv2i1( [[TMP0]], [[TMP1]], i32 [[IDX:%.*]]) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP2]]) +// CPP-CHECK-NEXT: ret [[TMP3]] +// +svbool_t test_svpsel_b64(svbool_t p1, svbool_t p2, uint32_t idx) { + return svpsel_b64(p1, p2, idx); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_revd.c @@ -0,0 +1,390 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svrevd_s8_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( zeroinitializer, [[PG:%.*]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svrevd_s8_zu10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( zeroinitializer, [[PG:%.*]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svrevd_s8_z(svbool_t pg, svint8_t op) { + return SVE_ACLE_FUNC(svrevd, _s8, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s16_zu10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svrevd_s16_z(svbool_t pg, svint16_t op) { + return SVE_ACLE_FUNC(svrevd, _s16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s32_zu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svrevd_s32_z(svbool_t pg, svint32_t op) { + return SVE_ACLE_FUNC(svrevd, _s32, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s64_zu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svrevd_s64_z(svbool_t pg, svint64_t op) { + return SVE_ACLE_FUNC(svrevd, _s64, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u8_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( zeroinitializer, [[PG:%.*]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svrevd_u8_zu10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( zeroinitializer, [[PG:%.*]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svrevd_u8_z(svbool_t pg, svuint8_t op) { + return SVE_ACLE_FUNC(svrevd, _u8, _z, )(pg, op); +} +// CHECK-LABEL: @test_svrevd_u16_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u16_zu10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svrevd_u16_z(svbool_t pg, svuint16_t op) { + return SVE_ACLE_FUNC(svrevd, _u16, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u32_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u32_zu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svrevd_u32_z(svbool_t pg, svuint32_t op) { + return SVE_ACLE_FUNC(svrevd, _u32, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u64_z( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u64_zu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( zeroinitializer, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svrevd_u64_z(svbool_t pg, svuint64_t op) { + return SVE_ACLE_FUNC(svrevd, _u64, _z, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( [[INACTIVE:%.*]], [[PG:%.*]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svrevd_s8_mu10__SVInt8_tu10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( [[INACTIVE:%.*]], [[PG:%.*]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svrevd_s8_m(svint8_t inactive, svbool_t pg, svint8_t op) { + return SVE_ACLE_FUNC(svrevd, _s8, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_s16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s16_mu11__SVInt16_tu10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svrevd_s16_m(svint16_t inactive, svbool_t pg, svint16_t op) { + return SVE_ACLE_FUNC(svrevd, _s16, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_s32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s32_mu11__SVInt32_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svrevd_s32_m(svint32_t inactive, svbool_t pg, svint32_t op) { + return SVE_ACLE_FUNC(svrevd, _s32, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_s64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s64_mu11__SVInt64_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svrevd_s64_m(svint64_t inactive, svbool_t pg, svint64_t op) { + return SVE_ACLE_FUNC(svrevd, _s64, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_u8_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( [[INACTIVE:%.*]], [[PG:%.*]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svrevd_u8_mu11__SVUint8_tu10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( [[INACTIVE:%.*]], [[PG:%.*]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svrevd_u8_m(svuint8_t inactive, svbool_t pg, svuint8_t op) { + return SVE_ACLE_FUNC(svrevd, _u8, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_u16_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u16_mu12__SVUint16_tu10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svrevd_u16_m(svuint16_t inactive, svbool_t pg, svuint16_t op) { + return SVE_ACLE_FUNC(svrevd, _u16, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_u32_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u32_mu12__SVUint32_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svrevd_u32_m(svuint32_t inactive, svbool_t pg, svuint32_t op) { + return SVE_ACLE_FUNC(svrevd, _u32, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_u64_m( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u64_mu12__SVUint64_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( [[INACTIVE:%.*]], [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svrevd_u64_m(svuint64_t inactive, svbool_t pg, svuint64_t op) { + return SVE_ACLE_FUNC(svrevd, _u64, _m, )(inactive, pg, op); +} + +// CHECK-LABEL: @test_svrevd_s8_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( undef, [[PG:%.*]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svrevd_s8_xu10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( undef, [[PG:%.*]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svrevd_s8_x(svbool_t pg, svint8_t op) { + return SVE_ACLE_FUNC(svrevd, _s8, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s16_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s16_xu10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint16_t test_svrevd_s16_x(svbool_t pg, svint16_t op) { + return SVE_ACLE_FUNC(svrevd, _s16, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s32_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s32_xu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint32_t test_svrevd_s32_x(svbool_t pg, svint32_t op) { + return SVE_ACLE_FUNC(svrevd, _s32, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_s64_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_s64_xu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svint64_t test_svrevd_s64_x(svbool_t pg, svint64_t op) { + return SVE_ACLE_FUNC(svrevd, _s64, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u8_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( undef, [[PG:%.*]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svrevd_u8_xu10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.revd.nxv16i8( undef, [[PG:%.*]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svrevd_u8_x(svbool_t pg, svuint8_t op) { + return SVE_ACLE_FUNC(svrevd, _u8, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u16_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u16_xu10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv8i16( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint16_t test_svrevd_u16_x(svbool_t pg, svuint16_t op) { + return SVE_ACLE_FUNC(svrevd, _u16, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u32_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u32_xu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv4i32( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint32_t test_svrevd_u32_x(svbool_t pg, svuint32_t op) { + return SVE_ACLE_FUNC(svrevd, _u32, _x, )(pg, op); +} + +// CHECK-LABEL: @test_svrevd_u64_x( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( undef, [[TMP0]], [[OP:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z17test_svrevd_u64_xu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.revd.nxv2i64( undef, [[TMP0]], [[OP:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svuint64_t test_svrevd_u64_x(svbool_t pg, svuint64_t op) { + return SVE_ACLE_FUNC(svrevd, _u64, _x, )(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sclamp.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sclamp.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_sclamp.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svsclamp_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv16i8( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svsclamp_s8u10__SVInt8_tu10__SVInt8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv16i8( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint8_t test_svsclamp_s8(svint8_t op1, svint8_t op2, svint8_t op3) { + return SVE_ACLE_FUNC(svsclamp, _s8, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svsclamp_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv8i16( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svsclamp_s16u11__SVInt16_tu11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv8i16( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint16_t test_svsclamp_s16(svint16_t op1, svint16_t op2, svint16_t op3) { + return SVE_ACLE_FUNC(svsclamp, _s16, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svsclamp_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv4i32( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svsclamp_s32u11__SVInt32_tu11__SVInt32_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv4i32( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint32_t test_svsclamp_s32(svint32_t op1, svint32_t op2, svint32_t op3) { + return SVE_ACLE_FUNC(svsclamp, _s32, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svsclamp_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv2i64( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svsclamp_s64u11__SVInt64_tu11__SVInt64_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.sclamp.nxv2i64( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svint64_t test_svsclamp_s64(svint64_t op1, svint64_t op2, svint64_t op3) { + return SVE_ACLE_FUNC(svsclamp, _s64, , )(op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_uclamp.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_uclamp.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_uclamp.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu \ +// RUN: -target-feature +sme -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +// CHECK-LABEL: @test_svuclamp_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv16i8( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svuclamp_u8u11__SVUint8_tu11__SVUint8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv16i8( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint8_t test_svuclamp_u8(svuint8_t op1, svuint8_t op2, svuint8_t op3) { + return SVE_ACLE_FUNC(svuclamp, _u8, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svuclamp_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv8i16( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svuclamp_u16u12__SVUint16_tu12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv8i16( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint16_t test_svuclamp_u16(svuint16_t op1, svuint16_t op2, svuint16_t op3) { + return SVE_ACLE_FUNC(svuclamp, _u16, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svuclamp_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv4i32( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svuclamp_u32u12__SVUint32_tu12__SVUint32_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv4i32( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint32_t test_svuclamp_u32(svuint32_t op1, svuint32_t op2, svuint32_t op3) { + return SVE_ACLE_FUNC(svuclamp, _u32, , )(op1, op2, op3); +} + +// CHECK-LABEL: @test_svuclamp_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv2i64( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_svuclamp_u64u12__SVUint64_tu12__SVUint64_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.uclamp.nxv2i64( [[OP1:%.*]], [[OP2:%.*]], [[OP3:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svuint64_t test_svuclamp_u64(svuint64_t op1, svuint64_t op2, svuint64_t op3) { + return SVE_ACLE_FUNC(svuclamp, _u64, , )(op1, op2, op3); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ld1.c @@ -0,0 +1,1294 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z16test_svld1_u8_x2u11__SVCount_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint8x2_t test_svld1_u8_x2(svcount_t pn, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svld1b,_u8,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_u16_x2u11__SVCount_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint16x2_t test_svld1_u16_x2(svcount_t pn, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svld1h,_u16,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_u32_x2u11__SVCount_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint32x2_t test_svld1_u32_x2(svcount_t pn, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svld1w,_u32,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_u64_x2u11__SVCount_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svld1_u64_x2(svcount_t pn, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svld1d,_u64,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z16test_svld1_u8_x4u11__SVCount_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint8x4_t test_svld1_u8_x4(svcount_t pn, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svld1b,_u8,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_u16_x4u11__SVCount_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint16x4_t test_svld1_u16_x4(svcount_t pn, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svld1h,_u16,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_u32_x4u11__SVCount_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svld1_u32_x4(svcount_t pn, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svld1w,_u32,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_u64_x4u11__SVCount_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svld1_u64_x4(svcount_t pn, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svld1d,_u64,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z16test_svld1_s8_x2u11__SVCount_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint8x2_t test_svld1_s8_x2(svcount_t pn, const int8_t *base) +{ + return SVE_ACLE_FUNC(svld1b,_s8,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_s16_x2u11__SVCount_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint16x2_t test_svld1_s16_x2(svcount_t pn, const int16_t *base) +{ + return SVE_ACLE_FUNC(svld1h,_s16,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_s32_x2u11__SVCount_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint32x2_t test_svld1_s32_x2(svcount_t pn, const int32_t *base) +{ + return SVE_ACLE_FUNC(svld1w,_s32,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_s64_x2u11__SVCount_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svld1_s64_x2(svcount_t pn, const int64_t *base) +{ + return SVE_ACLE_FUNC(svld1d,_s64,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z16test_svld1_s8_x4u11__SVCount_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint8x4_t test_svld1_s8_x4(svcount_t pn, const int8_t *base) +{ + return SVE_ACLE_FUNC(svld1b,_s8,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_s16_x4u11__SVCount_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint16x4_t test_svld1_s16_x4(svcount_t pn, const int16_t *base) +{ + return SVE_ACLE_FUNC(svld1h,_s16,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_s32_x4u11__SVCount_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svld1_s32_x4(svcount_t pn, const int32_t *base) +{ + return SVE_ACLE_FUNC(svld1w,_s32,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_s64_x4u11__SVCount_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svld1_s64_x4(svcount_t pn, const int64_t *base) +{ + return SVE_ACLE_FUNC(svld1d,_s64,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_f16_x2u11__SVCount_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat16x2_t test_svld1_f16_x2(svcount_t pn, const float16_t *base) +{ + return SVE_ACLE_FUNC(svld1h,_f16,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_f32_x2u11__SVCount_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat32x2_t test_svld1_f32_x2(svcount_t pn, const float32_t *base) +{ + return SVE_ACLE_FUNC(svld1w,_f32,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_f64_x2u11__SVCount_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svld1_f64_x2(svcount_t pn, const float64_t *base) +{ + return SVE_ACLE_FUNC(svld1d,_f64,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_f16_x4u11__SVCount_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x4_t test_svld1_f16_x4(svcount_t pn, const float16_t *base) +{ + return SVE_ACLE_FUNC(svld1h,_f16,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_f32_x4u11__SVCount_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svld1_f32_x4(svcount_t pn, const float32_t *base) +{ + return SVE_ACLE_FUNC(svld1w,_f32,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z17test_svld1_f64_x4u11__SVCount_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svld1_f64_x4(svcount_t pn, const float64_t *base) +{ + return SVE_ACLE_FUNC(svld1d,_f64,_x4,)(pn, base); +} + + +// == VNUM variants == + + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x2u11__SVCount_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8x2_t test_svld1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1b_vnum,_u8,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x2u11__SVCount_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16x2_t test_svld1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1h_vnum,_u16,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x2u11__SVCount_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint32x2_t test_svld1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1w_vnum,_u32,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x2u11__SVCount_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint64x2_t test_svld1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1d_vnum,_u64,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_u8_x4u11__SVCount_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint8x4_t test_svld1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1b_vnum,_u8,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u16_x4u11__SVCount_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint16x4_t test_svld1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1h_vnum,_u16,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u32_x4u11__SVCount_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint32x4_t test_svld1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1w_vnum,_u32,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_u64_x4u11__SVCount_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint64x4_t test_svld1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1d_vnum,_u64,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x2u11__SVCount_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8x2_t test_svld1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1b_vnum,_s8,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x2u11__SVCount_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint16x2_t test_svld1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1h_vnum,_s16,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x2u11__SVCount_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint32x2_t test_svld1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1w_vnum,_s32,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x2u11__SVCount_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint64x2_t test_svld1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1d_vnum,_s64,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z21test_svld1_vnum_s8_x4u11__SVCount_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint8x4_t test_svld1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1b_vnum,_s8,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s16_x4u11__SVCount_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint16x4_t test_svld1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1h_vnum,_s16,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s32_x4u11__SVCount_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint32x4_t test_svld1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1w_vnum,_s32,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_s64_x4u11__SVCount_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint64x4_t test_svld1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1d_vnum,_s64,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x2u11__SVCount_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat16x2_t test_svld1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1h_vnum,_f16,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x2u11__SVCount_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat32x2_t test_svld1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1w_vnum,_f32,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x2u11__SVCount_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat64x2_t test_svld1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1d_vnum,_f64,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f16_x4u11__SVCount_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat16x4_t test_svld1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1h_vnum,_f16,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f32_x4u11__SVCount_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat32x4_t test_svld1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1w_vnum,_f32,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svld1_vnum_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z22test_svld1_vnum_f64_x4u11__SVCount_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat64x4_t test_svld1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svld1d_vnum,_f64,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ldnt1.c @@ -0,0 +1,1294 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svldnt1_u8_x2u11__SVCount_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint8x2_t test_svldnt1_u8_x2(svcount_t pn, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svldnt1b,_u8,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_u16_x2u11__SVCount_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint16x2_t test_svldnt1_u16_x2(svcount_t pn, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svldnt1h,_u16,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_u32_x2u11__SVCount_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint32x2_t test_svldnt1_u32_x2(svcount_t pn, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svldnt1w,_u32,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_u64_x2u11__SVCount_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svuint64x2_t test_svldnt1_u64_x2(svcount_t pn, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svldnt1d,_u64,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z18test_svldnt1_u8_x4u11__SVCount_tPKh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint8x4_t test_svldnt1_u8_x4(svcount_t pn, const uint8_t *base) +{ + return SVE_ACLE_FUNC(svldnt1b,_u8,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_u16_x4u11__SVCount_tPKt( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint16x4_t test_svldnt1_u16_x4(svcount_t pn, const uint16_t *base) +{ + return SVE_ACLE_FUNC(svldnt1h,_u16,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_u32_x4u11__SVCount_tPKj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint32x4_t test_svldnt1_u32_x4(svcount_t pn, const uint32_t *base) +{ + return SVE_ACLE_FUNC(svldnt1w,_u32,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_u64_x4u11__SVCount_tPKm( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svuint64x4_t test_svldnt1_u64_x4(svcount_t pn, const uint64_t *base) +{ + return SVE_ACLE_FUNC(svldnt1d,_u64,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z18test_svldnt1_s8_x2u11__SVCount_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint8x2_t test_svldnt1_s8_x2(svcount_t pn, const int8_t *base) +{ + return SVE_ACLE_FUNC(svldnt1b,_s8,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_s16_x2u11__SVCount_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint16x2_t test_svldnt1_s16_x2(svcount_t pn, const int16_t *base) +{ + return SVE_ACLE_FUNC(svldnt1h,_s16,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_s32_x2u11__SVCount_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint32x2_t test_svldnt1_s32_x2(svcount_t pn, const int32_t *base) +{ + return SVE_ACLE_FUNC(svldnt1w,_s32,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_s64_x2u11__SVCount_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svint64x2_t test_svldnt1_s64_x2(svcount_t pn, const int64_t *base) +{ + return SVE_ACLE_FUNC(svldnt1d,_s64,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z18test_svldnt1_s8_x4u11__SVCount_tPKa( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP2]], [[TMP3]], i64 16) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP4]], [[TMP5]], i64 32) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP6]], [[TMP7]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint8x4_t test_svldnt1_s8_x4(svcount_t pn, const int8_t *base) +{ + return SVE_ACLE_FUNC(svldnt1b,_s8,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_s16_x4u11__SVCount_tPKs( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint16x4_t test_svldnt1_s16_x4(svcount_t pn, const int16_t *base) +{ + return SVE_ACLE_FUNC(svldnt1h,_s16,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_s32_x4u11__SVCount_tPKi( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint32x4_t test_svldnt1_s32_x4(svcount_t pn, const int32_t *base) +{ + return SVE_ACLE_FUNC(svldnt1w,_s32,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_s64_x4u11__SVCount_tPKl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svint64x4_t test_svldnt1_s64_x4(svcount_t pn, const int64_t *base) +{ + return SVE_ACLE_FUNC(svldnt1d,_s64,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_f16_x2u11__SVCount_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat16x2_t test_svldnt1_f16_x2(svcount_t pn, const float16_t *base) +{ + return SVE_ACLE_FUNC(svldnt1h,_f16,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_f32_x2u11__SVCount_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat32x2_t test_svldnt1_f32_x2(svcount_t pn, const float32_t *base) +{ + return SVE_ACLE_FUNC(svldnt1w,_f32,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: ret [[TMP4]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_f64_x2u11__SVCount_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP4]] +// +svfloat64x2_t test_svldnt1_f64_x2(svcount_t pn, const float64_t *base) +{ + return SVE_ACLE_FUNC(svldnt1d,_f64,_x2,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_f16_x4u11__SVCount_tPKDh( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP2]], [[TMP3]], i64 8) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP4]], [[TMP5]], i64 16) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP6]], [[TMP7]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat16x4_t test_svldnt1_f16_x4(svcount_t pn, const float16_t *base) +{ + return SVE_ACLE_FUNC(svldnt1h,_f16,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_f32_x4u11__SVCount_tPKf( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP2]], [[TMP3]], i64 4) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP4]], [[TMP5]], i64 8) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP6]], [[TMP7]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat32x4_t test_svldnt1_f32_x4(svcount_t pn, const float32_t *base) +{ + return SVE_ACLE_FUNC(svldnt1w,_f32,_x4,)(pn, base); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CHECK-NEXT: ret [[TMP8]] +// +// CPP-CHECK-LABEL: @_Z19test_svldnt1_f64_x4u11__SVCount_tPKd( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = extractvalue { , , , } [[TMP0]], 0 +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP1]], i64 0) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { , , , } [[TMP0]], 1 +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP2]], [[TMP3]], i64 2) +// CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { , , , } [[TMP0]], 2 +// CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP4]], [[TMP5]], i64 4) +// CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { , , , } [[TMP0]], 3 +// CPP-CHECK-NEXT: [[TMP8:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP6]], [[TMP7]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP8]] +// +svfloat64x4_t test_svldnt1_f64_x4(svcount_t pn, const float64_t *base) +{ + return SVE_ACLE_FUNC(svldnt1d,_f64,_x4,)(pn, base); +} + + +// == VNUM variants == + + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x2u11__SVCount_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint8x2_t test_svldnt1_vnum_u8_x2(svcount_t pn, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1b_vnum,_u8,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x2u11__SVCount_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint16x2_t test_svldnt1_vnum_u16_x2(svcount_t pn, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1h_vnum,_u16,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x2u11__SVCount_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint32x2_t test_svldnt1_vnum_u32_x2(svcount_t pn, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1w_vnum,_u32,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x2u11__SVCount_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svuint64x2_t test_svldnt1_vnum_u64_x2(svcount_t pn, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1d_vnum,_u64,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_u8_x4u11__SVCount_tPKhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint8x4_t test_svldnt1_vnum_u8_x4(svcount_t pn, const uint8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1b_vnum,_u8,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u16_x4u11__SVCount_tPKtl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint16x4_t test_svldnt1_vnum_u16_x4(svcount_t pn, const uint16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1h_vnum,_u16,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u32_x4u11__SVCount_tPKjl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint32x4_t test_svldnt1_vnum_u32_x4(svcount_t pn, const uint32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1w_vnum,_u32,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_u64_x4u11__SVCount_tPKml( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svuint64x4_t test_svldnt1_vnum_u64_x4(svcount_t pn, const uint64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1d_vnum,_u64,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x2u11__SVCount_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint8x2_t test_svldnt1_vnum_s8_x2(svcount_t pn, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1b_vnum,_s8,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x2u11__SVCount_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint16x2_t test_svldnt1_vnum_s16_x2(svcount_t pn, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1h_vnum,_s16,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x2u11__SVCount_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint32x2_t test_svldnt1_vnum_s32_x2(svcount_t pn, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1w_vnum,_s32,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x2u11__SVCount_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svint64x2_t test_svldnt1_vnum_s64_x2(svcount_t pn, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1d_vnum,_s64,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z23test_svldnt1_vnum_s8_x4u11__SVCount_tPKal( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP3]], [[TMP4]], i64 16) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP5]], [[TMP6]], i64 32) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv64i8.nxv16i8( [[TMP7]], [[TMP8]], i64 48) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint8x4_t test_svldnt1_vnum_s8_x4(svcount_t pn, const int8_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1b_vnum,_s8,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s16_x4u11__SVCount_tPKsl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32i16.nxv8i16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint16x4_t test_svldnt1_vnum_s16_x4(svcount_t pn, const int16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1h_vnum,_s16,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s32_x4u11__SVCount_tPKil( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16i32.nxv4i32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint32x4_t test_svldnt1_vnum_s32_x4(svcount_t pn, const int32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1w_vnum,_s32,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_s64_x4u11__SVCount_tPKll( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8i64.nxv2i64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svint64x4_t test_svldnt1_vnum_s64_x4(svcount_t pn, const int64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1d_vnum,_s64,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x2u11__SVCount_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat16x2_t test_svldnt1_vnum_f16_x2(svcount_t pn, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1h_vnum,_f16,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x2u11__SVCount_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat32x2_t test_svldnt1_vnum_f32_x2(svcount_t pn, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1w_vnum,_f32,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: ret [[TMP5]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x2u11__SVCount_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv4f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: ret [[TMP5]] +// +svfloat64x2_t test_svldnt1_vnum_f64_x2(svcount_t pn, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1d_vnum,_f64,_x2,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f16_x4u11__SVCount_tPKDhl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8f16(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP3]], [[TMP4]], i64 8) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP5]], [[TMP6]], i64 16) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv32f16.nxv8f16( [[TMP7]], [[TMP8]], i64 24) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat16x4_t test_svldnt1_vnum_f16_x4(svcount_t pn, const float16_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1h_vnum,_f16,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f32_x4u11__SVCount_tPKfl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4f32(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP3]], [[TMP4]], i64 4) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP5]], [[TMP6]], i64 8) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv16f32.nxv4f32( [[TMP7]], [[TMP8]], i64 12) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat32x4_t test_svldnt1_vnum_f32_x4(svcount_t pn, const float32_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1w_vnum,_f32,_x4,)(pn, base, vnum); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svldnt1_vnum_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CHECK-NEXT: ret [[TMP9]] +// +// CPP-CHECK-LABEL: @_Z24test_svldnt1_vnum_f64_x4u11__SVCount_tPKdl( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2f64(aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = extractvalue { , , , } [[TMP1]], 0 +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( poison, [[TMP2]], i64 0) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = extractvalue { , , , } [[TMP1]], 1 +// CPP-CHECK-NEXT: [[TMP5:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP3]], [[TMP4]], i64 2) +// CPP-CHECK-NEXT: [[TMP6:%.*]] = extractvalue { , , , } [[TMP1]], 2 +// CPP-CHECK-NEXT: [[TMP7:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP5]], [[TMP6]], i64 4) +// CPP-CHECK-NEXT: [[TMP8:%.*]] = extractvalue { , , , } [[TMP1]], 3 +// CPP-CHECK-NEXT: [[TMP9:%.*]] = tail call @llvm.vector.insert.nxv8f64.nxv2f64( [[TMP7]], [[TMP8]], i64 6) +// CPP-CHECK-NEXT: ret [[TMP9]] +// +svfloat64x4_t test_svldnt1_vnum_f64_x4(svcount_t pn, const float64_t *base, int64_t vnum) +{ + return SVE_ACLE_FUNC(svldnt1d_vnum,_f64,_x4,)(pn, base, vnum); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_pext.c @@ -0,0 +1,138 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c8_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(aarch64_svcount [[C:%.*]], i32 0) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(aarch64_svcount [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpext_lane_c8_0(svcount_t c) { + return svpext_lane_c8(c, 0); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c8_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(aarch64_svcount [[C:%.*]], i32 3) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z21test_svpext_lane_c8_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv16i1(aarch64_svcount [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svbool_t test_svpext_lane_c8_3(svcount_t c) { + return svpext_lane_c8(c, 3); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c16_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(aarch64_svcount [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(aarch64_svcount [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c16_0(svcount_t c) { + return svpext_lane_c16(c, 0); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c16_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(aarch64_svcount [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c16_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv8i1(aarch64_svcount [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c16_3(svcount_t c) { + return svpext_lane_c16(c, 3); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c32_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(aarch64_svcount [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(aarch64_svcount [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c32_0(svcount_t c) { + return svpext_lane_c32(c, 0); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c32_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(aarch64_svcount [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c32_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv4i1(aarch64_svcount [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv4i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c32_3(svcount_t c) { + return svpext_lane_c32(c, 3); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c64_0( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(aarch64_svcount [[C:%.*]], i32 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_0u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(aarch64_svcount [[C:%.*]], i32 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c64_0(svcount_t c) { + return svpext_lane_c64(c, 0); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svpext_lane_c64_3( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(aarch64_svcount [[C:%.*]], i32 3) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z22test_svpext_lane_c64_3u11__SVCount_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.pext.nxv2i1(aarch64_svcount [[C:%.*]], i32 3) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.to.svbool.nxv2i1( [[TMP0]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +svbool_t test_svpext_lane_c64_3(svcount_t c) { + return svpext_lane_c64(c, 3); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ptrue.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ptrue.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_ptrue.c @@ -0,0 +1,66 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK + +#include + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svptrue_c8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c8() +// CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z15test_svptrue_c8v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c8() +// CPP-CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +svcount_t test_svptrue_c8(void) { + return svptrue_c8(); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svptrue_c16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c16() +// CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svptrue_c16v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c16() +// CPP-CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +svcount_t test_svptrue_c16(void) { + return svptrue_c16(); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svptrue_c32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c32() +// CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svptrue_c32v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c32() +// CPP-CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +svcount_t test_svptrue_c32(void) { + return svptrue_c32(); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svptrue_c64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c64() +// CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z16test_svptrue_c64v( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call aarch64_svcount @llvm.aarch64.sve.ptrue.c64() +// CPP-CHECK-NEXT: ret aarch64_svcount [[TMP0]] +// +svcount_t test_svptrue_c64(void) { + return svptrue_c64(); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_st1.c @@ -0,0 +1,1042 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst1_u8_x2u11__SVCount_tPh11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) +{ + return SVE_ACLE_FUNC(svst1b,_u8,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_u16_x2u11__SVCount_tPt12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) +{ + return SVE_ACLE_FUNC(svst1h,_u16,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_u32_x2u11__SVCount_tPj12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) +{ + return SVE_ACLE_FUNC(svst1w,_u32,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_u64_x2u11__SVCount_tPm12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) +{ + return SVE_ACLE_FUNC(svst1d,_u64,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst1_u8_x4u11__SVCount_tPh11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) +{ + return SVE_ACLE_FUNC(svst1b,_u8,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_u16_x4u11__SVCount_tPt12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) +{ + return SVE_ACLE_FUNC(svst1h,_u16,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_u32_x4u11__SVCount_tPj12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) +{ + return SVE_ACLE_FUNC(svst1w,_u32,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_u64_x4u11__SVCount_tPm12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) +{ + return SVE_ACLE_FUNC(svst1d,_u64,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst1_s8_x2u11__SVCount_tPa10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) +{ + return SVE_ACLE_FUNC(svst1b,_s8,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_s16_x2u11__SVCount_tPs11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) +{ + return SVE_ACLE_FUNC(svst1h,_s16,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_s32_x2u11__SVCount_tPi11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) +{ + return SVE_ACLE_FUNC(svst1w,_s32,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_s64_x2u11__SVCount_tPl11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) +{ + return SVE_ACLE_FUNC(svst1d,_s64,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z16test_svst1_s8_x4u11__SVCount_tPa10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) +{ + return SVE_ACLE_FUNC(svst1b,_s8,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_s16_x4u11__SVCount_tPs11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) +{ + return SVE_ACLE_FUNC(svst1h,_s16,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_s32_x4u11__SVCount_tPi11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) +{ + return SVE_ACLE_FUNC(svst1w,_s32,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_s64_x4u11__SVCount_tPl11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) +{ + return SVE_ACLE_FUNC(svst1d,_s64,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8f16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_f16_x2u11__SVCount_tPDh13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8f16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) +{ + return SVE_ACLE_FUNC(svst1h,_f16,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4f32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_f32_x2u11__SVCount_tPf13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4f32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) +{ + return SVE_ACLE_FUNC(svst1w,_f32,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2f64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_f64_x2u11__SVCount_tPd13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2f64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) +{ + return SVE_ACLE_FUNC(svst1d,_f64,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_f16_x4u11__SVCount_tPDh13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) +{ + return SVE_ACLE_FUNC(svst1h,_f16,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_f32_x4u11__SVCount_tPf13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) +{ + return SVE_ACLE_FUNC(svst1w,_f32,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z17test_svst1_f64_x4u11__SVCount_tPd13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) +{ + return SVE_ACLE_FUNC(svst1d,_f64,_x4,)(pn, base, v); +} + + +// == VNUM variants == + + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) +{ + return SVE_ACLE_FUNC(svst1b_vnum,_u8,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) +{ + return SVE_ACLE_FUNC(svst1h_vnum,_u16,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) +{ + return SVE_ACLE_FUNC(svst1w_vnum,_u32,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) +{ + return SVE_ACLE_FUNC(svst1d_vnum,_u64,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) +{ + return SVE_ACLE_FUNC(svst1b_vnum,_u8,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) +{ + return SVE_ACLE_FUNC(svst1h_vnum,_u16,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) +{ + return SVE_ACLE_FUNC(svst1w_vnum,_u32,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) +{ + return SVE_ACLE_FUNC(svst1d_vnum,_u64,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) +{ + return SVE_ACLE_FUNC(svst1b_vnum,_s8,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) +{ + return SVE_ACLE_FUNC(svst1h_vnum,_s16,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) +{ + return SVE_ACLE_FUNC(svst1w_vnum,_s32,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) +{ + return SVE_ACLE_FUNC(svst1d_vnum,_s64,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svst1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) +{ + return SVE_ACLE_FUNC(svst1b_vnum,_s8,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) +{ + return SVE_ACLE_FUNC(svst1h_vnum,_s16,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) +{ + return SVE_ACLE_FUNC(svst1w_vnum,_s32,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) +{ + return SVE_ACLE_FUNC(svst1d_vnum,_s64,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8f16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f16_x2u11__SVCount_tPDhd13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv8f16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) +{ + return SVE_ACLE_FUNC(svst1h_vnum,_f16,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4f32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f32_x2u11__SVCount_tPfd13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv4f32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) +{ + return SVE_ACLE_FUNC(svst1w_vnum,_f32,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2f64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f64_x2u11__SVCount_tPdd13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg2.nxv2f64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) +{ + return SVE_ACLE_FUNC(svst1d_vnum,_f64,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f16_x4u11__SVCount_tPDhd13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) +{ + return SVE_ACLE_FUNC(svst1h_vnum,_f16,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f32_x4u11__SVCount_tPfd13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) +{ + return SVE_ACLE_FUNC(svst1w_vnum,_f32,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svst1_vnum_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svst1_vnum_f64_x4u11__SVCount_tPdd13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.st1.pn.vg4.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svst1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) +{ + return SVE_ACLE_FUNC(svst1d_vnum,_f64,_x4,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_stnt1.c @@ -0,0 +1,1042 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svstnt1_u8_x2u11__SVCount_tPh11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u8_x2(svcount_t pn, uint8_t *base, svuint8x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1b,_u8,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_u16_x2u11__SVCount_tPt12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u16_x2(svcount_t pn, uint16_t *base, svuint16x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1h,_u16,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_u32_x2u11__SVCount_tPj12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u32_x2(svcount_t pn, uint32_t *base, svuint32x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1w,_u32,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_u64_x2u11__SVCount_tPm12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u64_x2(svcount_t pn, uint64_t *base, svuint64x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1d,_u64,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svstnt1_u8_x4u11__SVCount_tPh11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u8_x4(svcount_t pn, uint8_t *base, svuint8x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1b,_u8,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_u16_x4u11__SVCount_tPt12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u16_x4(svcount_t pn, uint16_t *base, svuint16x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1h,_u16,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_u32_x4u11__SVCount_tPj12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u32_x4(svcount_t pn, uint32_t *base, svuint32x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1w,_u32,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_u64_x4u11__SVCount_tPm12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_u64_x4(svcount_t pn, uint64_t *base, svuint64x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1d,_u64,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svstnt1_s8_x2u11__SVCount_tPa10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s8_x2(svcount_t pn, int8_t *base, svint8x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1b,_s8,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_s16_x2u11__SVCount_tPs11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s16_x2(svcount_t pn, int16_t *base, svint16x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1h,_s16,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_s32_x2u11__SVCount_tPi11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s32_x2(svcount_t pn, int32_t *base, svint32x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1w,_s32,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_s64_x2u11__SVCount_tPl11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s64_x2(svcount_t pn, int64_t *base, svint64x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1d,_s64,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z18test_svstnt1_s8_x4u11__SVCount_tPa10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s8_x4(svcount_t pn, int8_t *base, svint8x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1b,_s8,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_s16_x4u11__SVCount_tPs11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s16_x4(svcount_t pn, int16_t *base, svint16x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1h,_s16,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_s32_x4u11__SVCount_tPi11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s32_x4(svcount_t pn, int32_t *base, svint32x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1w,_s32,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_s64_x4u11__SVCount_tPl11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_s64_x4(svcount_t pn, int64_t *base, svint64x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1d,_s64,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8f16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_f16_x2u11__SVCount_tPDh13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8f16( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_f16_x2(svcount_t pn, float16_t *base, svfloat16x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1h,_f16,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4f32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_f32_x2u11__SVCount_tPf13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4f32( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_f32_x2(svcount_t pn, float32_t *base, svfloat32x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1w,_f32,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2f64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_f64_x2u11__SVCount_tPd13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2f64( [[TMP0]], [[TMP1]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_f64_x2(svcount_t pn, float64_t *base, svfloat64x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1d,_f64,_x2,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_f16_x4u11__SVCount_tPDh13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8f16( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_f16_x4(svcount_t pn, float16_t *base, svfloat16x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1h,_f16,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_f32_x4u11__SVCount_tPf13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4f32( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_f32_x4(svcount_t pn, float32_t *base, svfloat32x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1w,_f32,_x4,)(pn, base, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svstnt1_f64_x4u11__SVCount_tPd13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2f64( [[TMP0]], [[TMP1]], [[TMP2]], [[TMP3]], aarch64_svcount [[PN:%.*]], ptr [[BASE:%.*]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_f64_x4(svcount_t pn, float64_t *base, svfloat64x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1d,_f64,_x4,)(pn, base, v); +} + + +// == VNUM variants == + + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x2u11__SVCount_tPhl11svuint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u8_x2(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1b_vnum,_u8,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x2u11__SVCount_tPtl12svuint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u16_x2(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1h_vnum,_u16,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x2u11__SVCount_tPjl12svuint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u32_x2(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1w_vnum,_u32,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x2u11__SVCount_tPml12svuint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u64_x2(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1d_vnum,_u64,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_u8_x4u11__SVCount_tPhl11svuint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u8_x4(svcount_t pn, uint8_t *base, int64_t vnum, svuint8x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1b_vnum,_u8,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u16_x4u11__SVCount_tPtl12svuint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u16_x4(svcount_t pn, uint16_t *base, int64_t vnum, svuint16x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1h_vnum,_u16,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u32_x4u11__SVCount_tPjl12svuint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u32_x4(svcount_t pn, uint32_t *base, int64_t vnum, svuint32x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1w_vnum,_u32,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_u64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_u64_x4u11__SVCount_tPml12svuint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_u64_x4(svcount_t pn, uint64_t *base, int64_t vnum, svuint64x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1d_vnum,_u64,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s8_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x2u11__SVCount_tPal10svint8x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv32i8( [[V]], i64 16) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s8_x2(svcount_t pn, int8_t *base, int64_t vnum, svint8x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1b_vnum,_s8,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x2u11__SVCount_tPsl11svint16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv16i16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s16_x2(svcount_t pn, int16_t *base, int64_t vnum, svint16x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1h_vnum,_s16,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x2u11__SVCount_tPil11svint32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv8i32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s32_x2(svcount_t pn, int32_t *base, int64_t vnum, svint32x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1w_vnum,_s32,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x2u11__SVCount_tPll11svint64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv4i64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s64_x2(svcount_t pn, int64_t *base, int64_t vnum, svint64x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1d_vnum,_s64,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s8_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svstnt1_vnum_s8_x4u11__SVCount_tPal10svint8x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 32) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv16i8.nxv64i8( [[V]], i64 48) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s8_x4(svcount_t pn, int8_t *base, int64_t vnum, svint8x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1b_vnum,_s8,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s16_x4u11__SVCount_tPsl11svint16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8i16.nxv32i16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s16_x4(svcount_t pn, int16_t *base, int64_t vnum, svint16x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1h_vnum,_s16,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s32_x4u11__SVCount_tPil11svint32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4i32.nxv16i32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s32_x4(svcount_t pn, int32_t *base, int64_t vnum, svint32x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1w_vnum,_s32,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_s64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_s64_x4u11__SVCount_tPll11svint64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[VNUM:%.*]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2i64.nxv8i64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_s64_x4(svcount_t pn, int64_t *base, int64_t vnum, svint64x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1d_vnum,_s64,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8f16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f16_x2u11__SVCount_tPDhd13svfloat16x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[V]], i64 8) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8f16( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_f16_x2(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1h_vnum,_f16,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_f32_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4f32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f32_x2u11__SVCount_tPfd13svfloat32x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv8f32( [[V]], i64 4) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4f32( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_f32_x2(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1w_vnum,_f32,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_f64_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2f64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f64_x2u11__SVCount_tPdd13svfloat64x2_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv4f64( [[V]], i64 2) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2f64( [[TMP1]], [[TMP2]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_f64_x2(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x2_t v) +{ + return SVE_ACLE_FUNC(svstnt1d_vnum,_f64,_x2,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_f16_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f16_x4u11__SVCount_tPDhd13svfloat16x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 16) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv32f16( [[V]], i64 24) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8f16( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_f16_x4(svcount_t pn, float16_t *base, float64_t vnum, svfloat16x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1h_vnum,_f16,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_f32_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f32_x4u11__SVCount_tPfd13svfloat32x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 8) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv4f32.nxv16f32( [[V]], i64 12) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4f32( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_f32_x4(svcount_t pn, float32_t *base, float64_t vnum, svfloat32x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1w_vnum,_f32,_x4,)(pn, base, vnum, v); +} + +__attribute__((arm_streaming)) +// CHECK-LABEL: @test_svstnt1_vnum_f64_x4( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z24test_svstnt1_vnum_f64_x4u11__SVCount_tPdd13svfloat64x4_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[CONV:%.*]] = fptosi double [[VNUM:%.*]] to i64 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = getelementptr , ptr [[BASE:%.*]], i64 [[CONV]] +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V:%.*]], i64 0) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 2) +// CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 4) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call @llvm.vector.extract.nxv2f64.nxv8f64( [[V]], i64 6) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2f64( [[TMP1]], [[TMP2]], [[TMP3]], [[TMP4]], aarch64_svcount [[PN:%.*]], ptr [[TMP0]]) +// CPP-CHECK-NEXT: ret void +// +void test_svstnt1_vnum_f64_x4(svcount_t pn, float64_t *base, float64_t vnum, svfloat64x4_t v) +{ + return SVE_ACLE_FUNC(svstnt1d_vnum,_f64,_x4,)(pn, base, vnum, v); +} diff --git a/clang/test/CodeGen/svboolx2_t.cpp b/clang/test/CodeGen/svboolx2_t.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/svboolx2_t.cpp @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: @_Z3foo10svboolx2_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 +// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: ret [[TMP0]] +// +__clang_svboolx2_t foo(__clang_svboolx2_t arg) { return arg; } + +__clang_svboolx2_t bar(); +// CHECK-LABEL: @_Z4foo2v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call @_Z3barv() +// CHECK-NEXT: ret [[CALL]] +// +__clang_svboolx2_t foo2() { return bar(); } + +__clang_svboolx2_t bar2(__clang_svboolx2_t); +// CHECK-LABEL: @_Z4foo310svboolx2_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 +// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[CALL:%.*]] = call @_Z4bar210svboolx2_t( [[TMP0]]) +// CHECK-NEXT: ret [[CALL]] +// +__clang_svboolx2_t foo3(__clang_svboolx2_t arg) { return bar2(arg); } + diff --git a/clang/test/CodeGen/svboolx4_t.cpp b/clang/test/CodeGen/svboolx4_t.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/svboolx4_t.cpp @@ -0,0 +1,31 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -S -emit-llvm -o - %s | FileCheck %s + +// CHECK-LABEL: @_Z3foo10svboolx4_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 +// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: ret [[TMP0]] +// +__clang_svboolx4_t foo(__clang_svboolx4_t arg) { return arg; } + +__clang_svboolx4_t bar(); +// CHECK-LABEL: @_Z4foo2v( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CALL:%.*]] = call @_Z3barv() +// CHECK-NEXT: ret [[CALL]] +// +__clang_svboolx4_t foo2() { return bar(); } + +__clang_svboolx4_t bar2(__clang_svboolx4_t); +// CHECK-LABEL: @_Z4foo310svboolx4_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[ARG_ADDR:%.*]] = alloca , align 2 +// CHECK-NEXT: store [[ARG:%.*]], ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load , ptr [[ARG_ADDR]], align 2 +// CHECK-NEXT: [[CALL:%.*]] = call @_Z4bar210svboolx4_t( [[TMP0]]) +// CHECK-NEXT: ret [[CALL]] +// +__clang_svboolx4_t foo3(__clang_svboolx4_t arg) { return bar2(arg); } + diff --git a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp --- a/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp +++ b/clang/test/CodeGenCXX/aarch64-mangle-sve-vectors.cpp @@ -31,6 +31,8 @@ void f12(S<__SVBFloat16_t>) {} // CHECK: _Z3f131SIu10__SVBool_tE void f13(S<__SVBool_t>) {} +// CHECK: _Z3f141SIu11__SVCount_tE +void f14(S<__SVCount_t>) {} // The tuple types don't use the internal name for mangling. @@ -106,3 +108,8 @@ void f47(S<__clang_svbfloat16x3_t>) {} // CHECK: _Z3f481SI14svbfloat16x4_tE void f48(S<__clang_svbfloat16x4_t>) {} +// CHECK: _Z3f491SI10svboolx2_tE +void f49(S<__clang_svboolx2_t>) {} +// CHECK: _Z3f501SI10svboolx4_tE +void f50(S<__clang_svboolx4_t>) {} + diff --git a/clang/test/CodeGenCXX/aarch64-sve-typeinfo.cpp b/clang/test/CodeGenCXX/aarch64-sve-typeinfo.cpp --- a/clang/test/CodeGenCXX/aarch64-sve-typeinfo.cpp +++ b/clang/test/CodeGenCXX/aarch64-sve-typeinfo.cpp @@ -22,6 +22,7 @@ auto &bf16 = typeid(__SVBFloat16_t); auto &b8 = typeid(__SVBool_t); +auto &c8 = typeid(__SVCount_t); // CHECK-DAG: @_ZTSu10__SVInt8_t = {{.*}} c"u10__SVInt8_t\00" // CHECK-DAG: @_ZTIu10__SVInt8_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu10__SVInt8_t @@ -61,3 +62,6 @@ // CHECK-DAG: @_ZTSu10__SVBool_t = {{.*}} c"u10__SVBool_t\00" // CHECK-DAG: @_ZTIu10__SVBool_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu10__SVBool_t + +// CHECK-DAG: @_ZTSu11__SVCount_t = {{.*}} c"u11__SVCount_t\00" +// CHECK-DAG: @_ZTIu11__SVCount_t = {{.*}} @_ZTVN10__cxxabiv123__fundamental_type_infoE, {{.*}} @_ZTSu11__SVCount_t diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -18,6 +18,7 @@ // CHECK-NEXT: AnyX86NoCfCheck (SubjectMatchRule_hasType_functionType) // CHECK-NEXT: ArcWeakrefUnavailable (SubjectMatchRule_objc_interface) // CHECK-NEXT: ArmBuiltinAlias (SubjectMatchRule_function) +// CHECK-NEXT: ArmLocallyStreaming (SubjectMatchRule_function) // CHECK-NEXT: AssumeAligned (SubjectMatchRule_objc_method, SubjectMatchRule_function) // CHECK-NEXT: Assumption (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: Availability ((SubjectMatchRule_record, SubjectMatchRule_enum, SubjectMatchRule_enum_constant, SubjectMatchRule_field, SubjectMatchRule_function, SubjectMatchRule_namespace, SubjectMatchRule_objc_category, SubjectMatchRule_objc_implementation, SubjectMatchRule_objc_interface, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property, SubjectMatchRule_objc_protocol, SubjectMatchRule_record, SubjectMatchRule_type_alias, SubjectMatchRule_variable)) diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c --- a/clang/test/Preprocessor/aarch64-target-features.c +++ b/clang/test/Preprocessor/aarch64-target-features.c @@ -200,6 +200,10 @@ // RUN: %clang -target aarch64-none-linux-gnu -march=armv9-a+sve2-bitperm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE2BITPERM %s // CHECK-SVE2BITPERM: __ARM_FEATURE_SVE2_BITPERM 1 +// RUN: %clang -target aarch64-none-linux-gnu -march=armv8-a+sme -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SME-BF16 %s +// CHECK-SME-BF16: __ARM_FEATURE_BF16_SCALAR_ARITHMETIC 1 +// CHECK-SME-BF16: __ARM_FEATURE_SVE_BF16 1 + // RUN: %clang -target aarch64-none-linux-gnu -march=armv8.2a+dotprod -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-DOTPROD %s // CHECK-DOTPROD: __ARM_FEATURE_DOTPROD 1 diff --git a/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-incompat-sm-builtin-calls.c @@ -0,0 +1,124 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME=1 -triple aarch64-none-linux-gnu -target-feature +sve \ +// RUN: -target-feature +sme -target-feature +sve2 -target-feature +neon -fsyntax-only -verify %s + +// REQUIRES: aarch64-registered-target + +#include "arm_neon.h" +#include "arm_sme.h" +#include "arm_sve.h" + +__attribute__((arm_streaming)) int16x8_t incompat_neon_sm(int16x8_t splat) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming or locally streaming function}} + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); +} + +__attribute__((arm_locally_streaming)) int16x8_t incompat_neon_ls(int16x8_t splat) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming or locally streaming function}} + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); +} + +__attribute__((arm_streaming_compatible)) int16x8_t incompat_neon_smc(int16x8_t splat) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return (int16x8_t)__builtin_neon_vqaddq_v((int8x16_t)splat, (int8x16_t)splat, 33); +} + +void incompat_sme_norm(svbool_t pg, void const *ptr) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} + return __builtin_sme_svld1_hor_za128(0, 0, 0, pg, ptr); +} + +__attribute__((arm_streaming_compatible)) void incompat_sme_smc(svbool_t pg, void const *ptr) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return __builtin_sme_svld1_hor_za128(0, 0, 0, pg, ptr); +} + +__attribute__((arm_streaming)) +svuint32_t +incompat_sve_sm(svbool_t pg, svuint32_t a, int16_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming or locally streaming function}} + return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); +} + +__attribute__((arm_locally_streaming)) +svuint32_t +incompat_sve_ls(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming or locally streaming function}} + return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); +} + +__attribute__((arm_streaming_compatible)) +svuint32_t +incompat_sve_smc(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return __builtin_sve_svld1_gather_u32base_index_u32(pg, a, b); +} + +__attribute__((arm_streaming)) +svuint32_t +incompat_sve2_sm(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming or locally streaming function}} + return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); +} + +__attribute__((arm_locally_streaming)) +svuint32_t +incompat_sve2_ls(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming or locally streaming function}} + return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); +} + +__attribute__((arm_streaming_compatible)) +svuint32_t +incompat_sve2_smc(svbool_t pg, svuint32_t a, int64_t b) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a streaming compatible function}} + return __builtin_sve_svldnt1_gather_u32base_index_u32(pg, a, b); +} + +__attribute__((arm_shared_za)) +void +incompat_sme_sm(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + // expected-warning@+1 {{builtin call has undefined behaviour when called from a non-streaming function}} + svmops_za32_f32(0, pn, pm, zn, zm); +} + +__attribute__((arm_streaming)) svfloat64_t +streaming_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) { + // expected-no-warning + return svadd_n_f64_m(pg, a, b); +} + +__attribute__((arm_locally_streaming)) svfloat64_t +locally_streaming_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) { + // expected-no-warning + return svadd_n_f64_m(pg, a, b); +} + +__attribute__((arm_streaming_compatible)) svfloat64_t +streaming_compatible_caller_sve(svbool_t pg, svfloat64_t a, float64_t b) { + // expected-no-warning + return svadd_n_f64_m(pg, a, b); +} + +__attribute__((arm_streaming)) svint16_t +streaming_caller_sve2(svint16_t op1, svint16_t op2) { + // expected-no-warning + return svmul_lane_s16(op1, op2, 0); +} + +__attribute__((arm_locally_streaming)) svint16_t +locally_streaming_caller_sve2(svint16_t op1, svint16_t op2) { + // expected-no-warning + return svmul_lane_s16(op1, op2, 0); +} + +__attribute__((arm_streaming_compatible)) svint16_t +streaming_compatible_caller_sve2(svint16_t op1, svint16_t op2) { + // expected-no-warning + return svmul_lane_s16(op1, op2, 0); +} + +__attribute__((arm_streaming)) svbool_t +streaming_caller_ptrue(void) { + // expected-no-warning + return svand_z(svptrue_b16(), svptrue_pat_b16(SV_ALL), svptrue_pat_b16(SV_VL4)); +} diff --git a/clang/test/Sema/aarch64-sme-attrs-no-sme.c b/clang/test/Sema/aarch64-sme-attrs-no-sme.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-sme-attrs-no-sme.c @@ -0,0 +1,36 @@ +// Test that the attribute is ignored if we don't compile for both AArch64 with +sme. +// +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple x86_64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +extern int normal_callee(void); + +// expected-warning@+1 {{unknown attribute 'arm_streaming' ignored}} +__attribute__((arm_streaming)) +int streaming_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_locally_streaming' ignored}} +__attribute__((arm_locally_streaming)) +int locally_streaming_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_shared_za' ignored}} +__attribute__((arm_shared_za)) +int shared_za_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_preserves_za' ignored}} +__attribute__((arm_preserves_za)) +int preserves_za_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_new_za' ignored}} +__attribute__((arm_new_za)) +int new_za_caller(void) { + return normal_callee(); +} diff --git a/clang/test/Sema/aarch64-sme-attrs-on-x86.c b/clang/test/Sema/aarch64-sme-attrs-on-x86.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-sme-attrs-on-x86.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple x86_64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s + +extern int normal_callee(); + +// expected-warning@+1 {{unknown attribute 'arm_streaming' ignored}} +__attribute__((arm_streaming)) +int streaming_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_streaming_compatible' ignored}} +__attribute__((arm_streaming_compatible)) +int streaming_compatible_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_locally_streaming' ignored}} +__attribute__((arm_locally_streaming)) +int locally_streaming_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_shared_za' ignored}} +__attribute__((arm_shared_za)) +int shared_za_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_preserves_za' ignored}} +__attribute__((arm_preserves_za)) +int preserves_za_caller(void) { + return normal_callee(); +} + +// expected-warning@+1 {{unknown attribute 'arm_new_za' ignored}} +__attribute__((arm_new_za)) +int new_za_caller(void) { + return normal_callee(); +} diff --git a/clang/test/Sema/aarch64-sme-func-attrs.c b/clang/test/Sema/aarch64-sme-func-attrs.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-sme-func-attrs.c @@ -0,0 +1,273 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -fsyntax-only -verify=expected-cpp -x c++ %s + +// Valid attributes + +__attribute__((arm_streaming)) void sme_arm_streaming(void); +__attribute__((arm_streaming_compatible)) void sme_arm_streaming_compatible(void); + +__attribute__((arm_new_za)) void sme_arm_new_za(void); +__attribute__((arm_shared_za)) void sme_arm_shared_za(void); +__attribute__((arm_preserves_za)) void sme_arm_preserves_za(void); + +__attribute__((arm_streaming, arm_new_za)) void sme_arm_streaming_new_za(void); +__attribute__((arm_streaming, arm_shared_za)) void sme_arm_streaming_shared_za(void); +__attribute__((arm_streaming, arm_preserves_za)) void sme_arm_streaming_preserves_za(void); + +__attribute__((arm_streaming_compatible, arm_new_za)) void sme_arm_sc_new_za(void); +__attribute__((arm_streaming_compatible, arm_shared_za)) void sme_arm_sc_shared_za(void); +__attribute__((arm_streaming_compatible, arm_preserves_za)) void sme_arm_sc_preserves_za(void); + +__attribute__((arm_shared_za, arm_preserves_za)) void sme_arm_shared_preserves_za(void); + +__attribute__((arm_locally_streaming)) void sme_arm_locally_streaming(void) { } +__attribute__((arm_locally_streaming, arm_streaming)) void sme_arm_streaming_and_locally_streaming(void) { } +__attribute__((arm_locally_streaming, arm_streaming_compatible)) void sme_arm_streaming_and_streaming_compatible(void) { } + +__attribute__((arm_locally_streaming, arm_new_za)) void sme_arm_ls_new_za(void) { } +__attribute__((arm_locally_streaming, arm_shared_za)) void sme_arm_ls_shared_za(void) { } +__attribute__((arm_locally_streaming, arm_preserves_za)) void sme_arm_ls_preserves_za(void) { } + +// Valid attributes on function pointers + +void __attribute__((arm_streaming)) streaming_ptr(void); +typedef __attribute__((arm_streaming)) void (*fptrty1) (void); +fptrty1 call_streaming_func() { return streaming_ptr; } + +void __attribute__((arm_streaming_compatible)) streaming_compatible_ptr(void); +typedef __attribute__((arm_streaming_compatible)) void (*fptrty2) (void); +fptrty2 call_sc_func() { return streaming_compatible_ptr; } + +void __attribute__((arm_new_za)) new_za_ptr(void); +typedef __attribute__((arm_new_za)) void (*fptrty3) (void); +fptrty3 call_new_za_func() { return new_za_ptr; } + +void __attribute__((arm_shared_za)) shared_za_ptr(void); +typedef __attribute__((arm_shared_za)) void (*fptrty4) (void); +fptrty4 call_shared_za_func() { return shared_za_ptr; } + +void __attribute__((arm_preserves_za)) preserves_za_ptr(void); +typedef __attribute__((arm_preserves_za)) void (*fptrty5) (void); +fptrty5 call_preserve_za_func() { return preserves_za_ptr; } + +void __attribute__((arm_shared_za, arm_preserves_za)) shared_preserves_za_ptr(void); +typedef __attribute__((arm_shared_za, arm_preserves_za)) void (*fptrty6) (void); +fptrty6 call_shared_preserve_za_func() { return shared_preserves_za_ptr; } + +typedef void (*fptrty7) (void); +fptrty7 cast_ls_func_to_normal() { return sme_arm_locally_streaming; } + +// FIXME: Add invalid function pointer assignments such as assigning: +// 1. A streaming compatible function to a normal function pointer, +// 2. A locally streaming function to a streaming function pointer, +// etc. + +// Invalid attributes + +// expected-cpp-error@+4 {{'arm_streaming_compatible' and 'arm_streaming' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_streaming_compatible' and 'arm_streaming' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((arm_streaming, arm_streaming_compatible)) void streaming_mode(void); + +// expected-cpp-error@+4 {{'arm_streaming' and 'arm_streaming_compatible' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_streaming' and 'arm_streaming_compatible' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((arm_streaming_compatible, arm_streaming)) void streaming_compatible(void); + +// expected-cpp-error@+4 {{'arm_shared_za' and 'arm_new_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_shared_za' and 'arm_new_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((arm_new_za, arm_shared_za)) void new_shared_za(void); + +// expected-cpp-error@+4 {{'arm_new_za' and 'arm_shared_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_new_za' and 'arm_shared_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((arm_shared_za, arm_new_za)) void shared_new_za(void); + +// expected-cpp-error@+4 {{'arm_preserves_za' and 'arm_new_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_preserves_za' and 'arm_new_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((arm_new_za, arm_preserves_za)) void new_preserves_za(void); + +// expected-cpp-error@+4 {{'arm_new_za' and 'arm_preserves_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_new_za' and 'arm_preserves_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +__attribute__((arm_preserves_za, arm_new_za)) void preserves_new_za(void); + +// Invalid attributes on function pointers + +// expected-cpp-error@+4 {{'arm_streaming_compatible' and 'arm_streaming' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_streaming_compatible' and 'arm_streaming' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((arm_streaming, arm_streaming_compatible)) streaming_ptr_invalid(void); +// expected-cpp-error@+4 {{'arm_streaming_compatible' and 'arm_streaming' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_streaming_compatible' and 'arm_streaming' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +typedef __attribute__((arm_streaming, arm_streaming_compatible)) void (*fptrty8) (void); +fptrty8 invalid_streaming_func() { return streaming_ptr_invalid; } + +// expected-cpp-error@+4 {{'arm_shared_za' and 'arm_new_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_shared_za' and 'arm_new_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((arm_new_za, arm_shared_za)) shared_za_ptr_invalid(void); +// expected-cpp-error@+4 {{'arm_shared_za' and 'arm_new_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_shared_za' and 'arm_new_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +typedef __attribute__((arm_new_za, arm_shared_za)) void (*fptrty9) (void); +fptrty9 invalid_shared_za_func() { return shared_za_ptr_invalid; } + +// expected-cpp-error@+4 {{'arm_preserves_za' and 'arm_new_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_preserves_za' and 'arm_new_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +void __attribute__((arm_new_za, arm_preserves_za)) preserves_za_ptr_invalid(void); +// expected-cpp-error@+4 {{'arm_preserves_za' and 'arm_new_za' attributes are not compatible}} +// expected-cpp-note@+3 {{conflicting attribute is here}} +// expected-error@+2 {{'arm_preserves_za' and 'arm_new_za' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +typedef __attribute__((arm_new_za, arm_preserves_za)) void (*fptrty10) (void); +fptrty10 invalid_preserve_za_func() { return preserves_za_ptr_invalid; } + +// expected-cpp-error@+2 {{'arm_locally_streaming' attribute only applies to functions}} +// expected-error@+1 {{'arm_locally_streaming' attribute only applies to functions}} +typedef __attribute__((arm_locally_streaming)) void (*fptrty11) (void); + +// expected-warning@+2 {{'arm_streaming' attribute ignored}} +// expected-warning@+1 {{'arm_streaming' only applies to function types; type here is 'void ()'}} +__attribute__((arm_streaming)) void function_no_prototype(); + +// +// Check for incorrect conversions of function pointers with the attributes +// + +typedef void (*n_ptrty) (void); +typedef __attribute__((arm_streaming)) void (*s_ptrty) (void); +s_ptrty return_valid_streaming_fptr(s_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 's_ptrty' (aka 'void (*)() __attribute__((arm_streaming))') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 's_ptrty' (aka 'void (*)(void) __attribute__((arm_streaming))')}} +s_ptrty return_invalid_fptr_streaming_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 's_ptrty' (aka 'void (*)() __attribute__((arm_streaming))')}} +// expected-error@+1 {{incompatible function pointer types returning 's_ptrty' (aka 'void (*)(void) __attribute__((arm_streaming))') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_streaming(s_ptrty f) { return f; } + +typedef __attribute__((arm_streaming_compatible)) void (*sc_ptrty) (void); +sc_ptrty return_valid_streaming_compatible_fptr(sc_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 'sc_ptrty' (aka 'void (*)() __attribute__((arm_streaming_compatible))') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'sc_ptrty' (aka 'void (*)(void) __attribute__((arm_streaming_compatible))')}} +sc_ptrty return_invalid_fptr_streaming_compatible_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'sc_ptrty' (aka 'void (*)() __attribute__((arm_streaming_compatible))')}} +// expected-error@+1 {{incompatible function pointer types returning 'sc_ptrty' (aka 'void (*)(void) __attribute__((arm_streaming_compatible))') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_streaming_compatible(sc_ptrty f) { return f; } + +typedef __attribute__((arm_new_za)) void (*nz_ptrty) (void); +nz_ptrty return_valid_new_za_fptr(nz_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 'nz_ptrty' (aka 'void (*)() __attribute__((arm_new_za))') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'nz_ptrty' (aka 'void (*)(void) __attribute__((arm_new_za))')}} +nz_ptrty return_invalid_fptr_new_za_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'nz_ptrty' (aka 'void (*)() __attribute__((arm_new_za))')}} +// expected-error@+1 {{incompatible function pointer types returning 'nz_ptrty' (aka 'void (*)(void) __attribute__((arm_new_za))') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_new_za(nz_ptrty f) { return f; } + +typedef __attribute__((arm_shared_za)) void (*sz_ptrty) (void); +sz_ptrty return_valid_shared_za_fptr(sz_ptrty f) { return f; } + + +// expected-cpp-error@+2 {{cannot initialize return object of type 'sz_ptrty' (aka 'void (*)() __attribute__((arm_shared_za))') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'sz_ptrty' (aka 'void (*)(void) __attribute__((arm_shared_za))')}} +sz_ptrty return_invalid_fptr_shared_za_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'sz_ptrty' (aka 'void (*)() __attribute__((arm_shared_za))')}} +// expected-error@+1 {{incompatible function pointer types returning 'sz_ptrty' (aka 'void (*)(void) __attribute__((arm_shared_za))') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_shared_za(sz_ptrty f) { return f; } + +typedef __attribute__((arm_preserves_za)) void (*pz_ptrty) (void); +pz_ptrty return_valid_preserves_za_fptr(pz_ptrty f) { return f; } + +// expected-cpp-error@+2 {{cannot initialize return object of type 'pz_ptrty' (aka 'void (*)() __attribute__((arm_preserves_za))') with an lvalue of type 'n_ptrty' (aka 'void (*)()')}} +// expected-error@+1 {{incompatible function pointer types returning 'n_ptrty' (aka 'void (*)(void)') from a function with result type 'pz_ptrty' (aka 'void (*)(void) __attribute__((arm_preserves_za))')}} +pz_ptrty return_invalid_fptr_preserves_za_normal(n_ptrty f) { return f; } +// expected-cpp-error@+2 {{cannot initialize return object of type 'n_ptrty' (aka 'void (*)()') with an lvalue of type 'pz_ptrty' (aka 'void (*)() __attribute__((arm_preserves_za))')}} +// expected-error@+1 {{incompatible function pointer types returning 'pz_ptrty' (aka 'void (*)(void) __attribute__((arm_preserves_za))') from a function with result type 'n_ptrty' (aka 'void (*)(void)')}} +n_ptrty return_invalid_fptr_normal_preserves_za(pz_ptrty f) { return f; } + +// Test template instantiations +#ifdef __cplusplus +template __attribute__((arm_streaming)) T templated(T x) { return x; } +template <> __attribute__((arm_streaming)) int templated(int x) { return x + 1; } +template <> __attribute__((arm_streaming)) float templated(float x) { return x + 2; } +// expected-cpp-error@+2 {{explicit instantiation of 'templated' does not refer to a function template, variable template, member function, member class, or static data member}} +// expected-cpp-note@-4 {{candidate template ignored: could not match 'short (short) __attribute__((arm_streaming))' against 'short (short)'}} +template short templated(short); +#endif + +// Conflicting attributes on redeclarations + +// expected-error@+5 {{function declared ''void (void) __attribute__((arm_streaming_compatible))'' was previously declared ''void (void) __attribute__((arm_streaming))'' with different SME function attributes}} +// expected-note@+3 {{previous declaration is here}} +// expected-cpp-error@+3 {{function declared ''void () __attribute__((arm_streaming_compatible))'' was previously declared ''void () __attribute__((arm_streaming))'' with different SME function attributes}} +// expected-cpp-note@+1 {{previous declaration is here}} +__attribute__((arm_streaming)) void redecl(void); +__attribute__((arm_streaming_compatible)) void redecl(void) { } + +// expected-error@+5 {{function declared ''void (void) __attribute__((arm_shared_za))'' was previously declared ''void (void) __attribute__((arm_shared_za)) __attribute__((arm_preserves_za))'' with different SME function attributes}} +// expected-note@+3 {{previous declaration is here}} +// expected-cpp-error@+3 {{function declared ''void () __attribute__((arm_shared_za))'' was previously declared ''void () __attribute__((arm_shared_za)) __attribute__((arm_preserves_za))'' with different SME function attributes}} +// expected-cpp-note@+1 {{previous declaration is here}} +__attribute__((arm_shared_za, arm_preserves_za)) void redecl_nopreserve_za(void); +__attribute__((arm_shared_za)) void redecl_nopreserve_za(void) { } + +#ifdef __cplusplus +struct S { + virtual __attribute__((arm_shared_za)) void shared_za_memberfn(void); +}; + +struct S2 : public S { +// expected-cpp-error@+2 {{virtual function 'shared_za_memberfn' has different attributes ('void () __attribute__((arm_new_za))') than the function it overrides (which has 'void () __attribute__((arm_shared_za))')}} +// expected-cpp-note@-5 {{overridden virtual function is here}} + __attribute__((arm_new_za)) void shared_za_memberfn(void) override; +}; + +// Check that the attribute propagates through template instantiations. +template +struct S3 { + static constexpr int value = 0; +}; + +template <> +struct S3 { + static constexpr int value = 1; +}; + +template <> +struct S3 { + static constexpr int value = 2; +}; + +void normal_func() {} +void streaming_func() __attribute__((arm_streaming)) {} + +static_assert(S3::value == 1, "why are we picking the wrong specialization?"); +static_assert(S3::value == 2, "why are we picking the wrong specialization?"); +#endif + +// expected-cpp-error@+2 {{'arm_streaming' attribute takes no arguments}} +// expected-error@+1 {{'arm_streaming' attribute takes no arguments}} +__attribute__((arm_streaming(0))) void invalid_streaming_args(void); + +// expected-cpp-error@+4 {{attribute only applies to non-K&R-style functions}} +// expected-cpp-warning@+3 {{'arm_streaming' only applies to function types; type here is 'int'}} +// expected-error@+2 {{attribute only applies to non-K&R-style functions}} +// expected-warning@+1 {{'arm_streaming' only applies to function types; type here is 'int'}} +__attribute__((arm_streaming)) int invalid_type_for_attribute; diff --git a/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sme2_imm.cpp b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sme2_imm.cpp new file mode 100644 --- /dev/null +++ b/clang/test/Sema/aarch64-sve2p1-intrinsics/acle_sme2_imm.cpp @@ -0,0 +1,24 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -triple aarch64-none-linux-gnu -target-feature +sme2 -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -D__ARM_FEATURE_SME2 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme2 -fsyntax-only -verify -verify-ignore-unexpected=error %s + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +#include + +__attribute__((arm_streaming)) +void test_svpext_lane_imm_0_3(svcount_t c) { + svpext_lane_c8(c, -1); // expected-error @ {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c16(c, -1); // expected-error @ {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c32(c, -1); // expected-error @ {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + svpext_lane_c64(c, -1); // expected-error @ {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + + svpext_lane_c8(c, 4); // expected-error @+1 {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c16(c, 4); // expected-error @+1 {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c32(c, 4); // expected-error @+1 {{argument value 4 is outside the valid range [0, 3]}} + svpext_lane_c64(c, 4); // expected-error @+1 {{argument value 4 is outside the valid range [0, 3]}} +} diff --git a/clang/unittests/AST/SizelessTypesTest.cpp b/clang/unittests/AST/SizelessTypesTest.cpp --- a/clang/unittests/AST/SizelessTypesTest.cpp +++ b/clang/unittests/AST/SizelessTypesTest.cpp @@ -45,6 +45,7 @@ ASSERT_TRUE(Ctx.SveBFloat16Ty->isSizelessBuiltinType()); ASSERT_TRUE(Ctx.SveBoolTy->isSizelessBuiltinType()); + ASSERT_TRUE(Ctx.SveCountTy->isSizelessBuiltinType()); ASSERT_FALSE(Ctx.VoidTy->isSizelessBuiltinType()); ASSERT_FALSE(Ctx.PseudoObjectTy->isSizelessBuiltinType()); @@ -55,6 +56,12 @@ Ctx.getLValueReferenceType(Ctx.SveBoolTy)->isSizelessBuiltinType()); ASSERT_FALSE( Ctx.getRValueReferenceType(Ctx.SveBoolTy)->isSizelessBuiltinType()); + + ASSERT_FALSE(Ctx.getPointerType(Ctx.SveCountTy)->isSizelessBuiltinType()); + ASSERT_FALSE( + Ctx.getLValueReferenceType(Ctx.SveCountTy)->isSizelessBuiltinType()); + ASSERT_FALSE( + Ctx.getRValueReferenceType(Ctx.SveCountTy)->isSizelessBuiltinType()); } TEST_F(SizelessTypeTester, TestSizeless) { @@ -75,6 +82,7 @@ ASSERT_TRUE(Ctx.SveBFloat16Ty->isSizelessType()); ASSERT_TRUE(Ctx.SveBoolTy->isSizelessType()); + ASSERT_TRUE(Ctx.SveCountTy->isSizelessType()); ASSERT_FALSE(Ctx.VoidTy->isSizelessType()); ASSERT_FALSE(Ctx.PseudoObjectTy->isSizelessType()); @@ -83,4 +91,8 @@ ASSERT_FALSE(Ctx.getPointerType(Ctx.SveBoolTy)->isSizelessType()); ASSERT_FALSE(Ctx.getLValueReferenceType(Ctx.SveBoolTy)->isSizelessType()); ASSERT_FALSE(Ctx.getRValueReferenceType(Ctx.SveBoolTy)->isSizelessType()); + + ASSERT_FALSE(Ctx.getPointerType(Ctx.SveCountTy)->isSizelessType()); + ASSERT_FALSE(Ctx.getLValueReferenceType(Ctx.SveCountTy)->isSizelessType()); + ASSERT_FALSE(Ctx.getRValueReferenceType(Ctx.SveCountTy)->isSizelessType()); } diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -553,6 +553,8 @@ void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl &Defs); void genOverloadTypeCheckCode(raw_ostream &OS, SmallVectorImpl &Defs); + void genStreamingSVECompatibleList(raw_ostream &OS, + SmallVectorImpl &Defs); void genIntrinsicRangeCheckCode(raw_ostream &OS, SmallVectorImpl &Defs); @@ -2040,6 +2042,30 @@ OS << "#endif\n\n"; } +void NeonEmitter::genStreamingSVECompatibleList( + raw_ostream &OS, SmallVectorImpl &Defs) { + OS << "#ifdef GET_NEON_STREAMING_COMPAT_FLAG\n"; + + std::set Emitted; + for (auto *Def : Defs) { + // If the def has a body (that is, it has Operation DAGs), it won't call + // __builtin_neon_* so we don't need to generate a definition for it. + if (Def->hasBody()) + continue; + + std::string Name = Def->getMangledName(); + if (Emitted.find(Name) != Emitted.end()) + continue; + + // FIXME: We should make exceptions here for some NEON builtins that are + // permitted in streaming mode. + OS << "case NEON::BI__builtin_neon_" << Name + << ": BuiltinType = ArmNonStreaming; break;\n"; + Emitted.insert(Name); + } + OS << "#endif\n\n"; +} + /// Generate the ARM and AArch64 overloaded type checking code for /// SemaChecking.cpp, checking for unique builtin declarations. void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS, @@ -2222,6 +2248,8 @@ // Generate ARM overloaded type checking code for SemaChecking.cpp genOverloadTypeCheckCode(OS, Defs); + genStreamingSVECompatibleList(OS, Defs); + // Generate ARM range checking code for shift/lane immediates. genIntrinsicRangeCheckCode(OS, Defs); } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -43,6 +43,8 @@ ClassG, // Overloaded name without type suffix }; +enum class ACLEKind { SVE, SME }; + using TypeSpec = std::string; namespace { @@ -66,7 +68,8 @@ class SVEType { TypeSpec TS; bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat; - bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp; + bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp, + Svcount; unsigned Bitwidth, ElementBitwidth, NumVectors; public: @@ -76,7 +79,8 @@ : TS(TS), Float(false), Signed(true), Immediate(false), Void(false), Constant(false), Pointer(false), BFloat(false), DefaultType(false), IsScalable(true), Predicate(false), PredicatePattern(false), - PrefetchOp(false), Bitwidth(128), ElementBitwidth(~0U), NumVectors(1) { + PrefetchOp(false), Svcount(false), Bitwidth(128), + ElementBitwidth(~0U), NumVectors(1) { if (!TS.empty()) applyTypespec(); applyModifier(CharMod); @@ -95,13 +99,14 @@ bool isFloat() const { return Float && !BFloat; } bool isBFloat() const { return BFloat && !Float; } bool isFloatingPoint() const { return Float || BFloat; } - bool isInteger() const { return !isFloatingPoint() && !Predicate; } + bool isInteger() const { return !isFloatingPoint() && !Predicate && !Svcount; } bool isScalarPredicate() const { return !isFloatingPoint() && Predicate && NumVectors == 0; } bool isPredicateVector() const { return Predicate; } bool isPredicatePattern() const { return PredicatePattern; } bool isPrefetchOp() const { return PrefetchOp; } + bool isSvcount() const { return Svcount; } bool isConstant() const { return Constant; } unsigned getElementSizeInBits() const { return ElementBitwidth; } unsigned getNumVectors() const { return NumVectors; } @@ -167,13 +172,16 @@ uint64_t Flags; + std::string Attrs; + SmallVector ImmChecks; public: Intrinsic(StringRef Name, StringRef Proto, uint64_t MergeTy, StringRef MergeSuffix, uint64_t MemoryElementTy, StringRef LLVMName, uint64_t Flags, ArrayRef ImmChecks, TypeSpec BT, - ClassKind Class, SVEEmitter &Emitter, StringRef Guard); + ClassKind Class, SVEEmitter &Emitter, StringRef Guard, + std::string Attrs); ~Intrinsic()=default; @@ -228,7 +236,7 @@ } /// Emits the intrinsic declaration to the ostream. - void emitIntrinsic(raw_ostream &OS) const; + void emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const; private: std::string getMergeSuffix() const { return MergeSuffix; } @@ -334,18 +342,29 @@ /// Emit arm_sve.h. void createHeader(raw_ostream &o); + // Emits core intrinsics in both arm_sme.h and arm_sve.h + void createCoreHeaderIntrinsics(raw_ostream &o, ACLEKind Kind); + /// Emit all the __builtin prototypes and code needed by Sema. - void createBuiltins(raw_ostream &o); + void createBuiltins(raw_ostream &o, ACLEKind Kind); /// Emit all the information needed to map builtin -> LLVM IR intrinsic. - void createCodeGenMap(raw_ostream &o); + void createCodeGenMap(raw_ostream &o, ACLEKind Kind); /// Emit all the range checks for the immediates. - void createRangeChecks(raw_ostream &o); + void createRangeChecks(raw_ostream &o, ACLEKind Kind); /// Create the SVETypeFlags used in CGBuiltins void createTypeFlags(raw_ostream &o); + /// Emit arm_sme.h. + void createSmeHeader(raw_ostream &o); + + /// Create the SMETypeFlags used in CGBuiltins + void createSmeTypeFlags(raw_ostream &o); + + void createStreamingAttrs(raw_ostream &o, ACLEKind Kind); + /// Create intrinsic and add it to \p Out void createIntrinsic(Record *R, SmallVectorImpl> &Out); }; @@ -365,6 +384,9 @@ if (isScalarPredicate()) return "b"; + if (isSvcount()) + return "Q"; + if (isVoidPointer()) S += "v"; else if (!isFloatingPoint()) @@ -425,16 +447,19 @@ return "enum svprfop"; std::string S; + if (Void) S += "void"; else { - if (isScalableVector()) + if (isScalableVector() || isSvcount()) S += "sv"; if (!Signed && !isFloatingPoint()) S += "u"; if (Float) S += "float"; + else if (isSvcount()) + S += "count"; else if (isScalarPredicate() || isPredicateVector()) S += "bool"; else if (isBFloat()) @@ -442,7 +467,7 @@ else S += "int"; - if (!isScalarPredicate() && !isPredicateVector()) + if (!isScalarPredicate() && !isPredicateVector() && !isSvcount()) S += utostr(ElementBitwidth); if (!isScalableVector() && isVector()) S += "x" + utostr(getNumElements()); @@ -462,6 +487,9 @@ void SVEType::applyTypespec() { for (char I : TS) { switch (I) { + case 'Q': + Svcount = true; + break; case 'P': Predicate = true; break; @@ -553,6 +581,7 @@ Float = false; BFloat = false; Predicate = true; + Svcount = false; Bitwidth = 16; ElementBitwidth = 1; break; @@ -592,18 +621,21 @@ break; case 'u': Predicate = false; + Svcount = false; Signed = false; Float = false; BFloat = false; break; case 'x': Predicate = false; + Svcount = false; Signed = true; Float = false; BFloat = false; break; case 'i': Predicate = false; + Svcount = false; Float = false; BFloat = false; ElementBitwidth = Bitwidth = 64; @@ -613,6 +645,7 @@ break; case 'I': Predicate = false; + Svcount = false; Float = false; BFloat = false; ElementBitwidth = Bitwidth = 32; @@ -623,6 +656,7 @@ break; case 'J': Predicate = false; + Svcount = false; Float = false; BFloat = false; ElementBitwidth = Bitwidth = 32; @@ -633,6 +667,7 @@ break; case 'k': Predicate = false; + Svcount = false; Signed = true; Float = false; BFloat = false; @@ -641,6 +676,7 @@ break; case 'l': Predicate = false; + Svcount = false; Signed = true; Float = false; BFloat = false; @@ -649,6 +685,7 @@ break; case 'm': Predicate = false; + Svcount = false; Signed = false; Float = false; BFloat = false; @@ -657,6 +694,7 @@ break; case 'n': Predicate = false; + Svcount = false; Signed = false; Float = false; BFloat = false; @@ -695,17 +733,20 @@ break; case 'O': Predicate = false; + Svcount = false; Float = true; ElementBitwidth = 16; break; case 'M': Predicate = false; + Svcount = false; Float = true; BFloat = false; ElementBitwidth = 32; break; case 'N': Predicate = false; + Svcount = false; Float = true; ElementBitwidth = 64; break; @@ -799,6 +840,26 @@ NumVectors = 0; Signed = false; break; + case '{': + Pointer = true; + Void = true; + NumVectors = 0; + break; + case '}': + Predicate = false; + Signed = true; + Svcount = true; + NumVectors = 0; + Float = false; + BFloat = false; + break; + case 'y': + Predicate = true; + Svcount = false; + NumVectors = 0; + Float = false; + BFloat = false; + break; default: llvm_unreachable("Unhandled character!"); } @@ -813,11 +874,11 @@ StringRef MergeSuffix, uint64_t MemoryElementTy, StringRef LLVMName, uint64_t Flags, ArrayRef Checks, TypeSpec BT, ClassKind Class, - SVEEmitter &Emitter, StringRef Guard) + SVEEmitter &Emitter, StringRef Guard, std::string Attrs) : Name(Name.str()), LLVMName(LLVMName), Proto(Proto.str()), BaseTypeSpec(BT), Class(Class), Guard(Guard.str()), MergeSuffix(MergeSuffix.str()), BaseType(BT, 'd'), Flags(Flags), - ImmChecks(Checks.begin(), Checks.end()) { + Attrs(Attrs), ImmChecks(Checks.begin(), Checks.end()) { // Types[0] is the return value. for (unsigned I = 0; I < Proto.size(); ++I) { SVEType T(BaseTypeSpec, Proto[I]); @@ -879,6 +940,8 @@ std::string TypeCode; if (T.isInteger()) TypeCode = T.isSigned() ? 's' : 'u'; + else if (T.isSvcount()) + TypeCode = 'c'; else if (T.isPredicateVector()) TypeCode = 'b'; else if (T.isBFloat()) @@ -918,15 +981,23 @@ getMergeSuffix(); } -void Intrinsic::emitIntrinsic(raw_ostream &OS) const { +void Intrinsic::emitIntrinsic(raw_ostream &OS, ACLEKind Kind) const { bool IsOverloaded = getClassKind() == ClassG && getProto().size() > 1; std::string FullName = mangleName(ClassS); std::string ProtoName = mangleName(getClassKind()); - OS << (IsOverloaded ? "__aio " : "__ai ") - << "__attribute__((__clang_arm_builtin_alias(" - << "__builtin_sve_" << FullName << ")))\n"; + OS << (IsOverloaded ? "__aio " : "__ai ") << Attrs << '\n' + << "__attribute__((__clang_arm_builtin_alias("; + + switch (Kind) { + case ACLEKind::SME: + OS << "__builtin_sme_" << FullName << ")))\n"; + break; + case ACLEKind::SVE: + OS << "__builtin_sve_" << FullName << ")))\n"; + break; + } OS << getTypes()[0].str() << " " << ProtoName << "("; for (unsigned I = 0; I < getTypes().size() - 1; ++I) { @@ -959,7 +1030,7 @@ return encodeEltType("EltTyBFloat16"); } - if (T.isPredicateVector()) { + if (T.isPredicateVector() || T.isSvcount()) { switch (T.getElementSizeInBits()) { case 8: return encodeEltType("EltTyBool8"); @@ -1012,6 +1083,29 @@ Types = "i"; } + // Create attributes + bool IsStreaming = getEnumValueForFlag("IsStreaming") & Flags; + bool IsStreamingCompatible = + getEnumValueForFlag("IsStreamingCompatible") & Flags; + bool IsSharedZA = getEnumValueForFlag("IsSharedZA") & Flags; + bool IsPreservedZA = getEnumValueForFlag("IsPreservedZA") & Flags; + std::string Attrs = ""; + if (IsStreaming) + Attrs = "__as "; + + if (IsStreamingCompatible) + Attrs += "__asc "; + + if (IsSharedZA) + Attrs += "__asza "; + + if (IsPreservedZA) + Attrs += "__apza "; + + // Kill the final space. + if (uint64_t AttrLen = Attrs.size()) + Attrs.resize(AttrLen - 1); + // Extract type specs from string SmallVector TypeSpecs; TypeSpec Acc; @@ -1048,14 +1142,54 @@ Out.push_back(std::make_unique( Name, Proto, Merge, MergeSuffix, MemEltType, LLVMName, Flags, ImmChecks, - TS, ClassS, *this, Guard)); + TS, ClassS, *this, Guard, Attrs)); // Also generate the short-form (e.g. svadd_m) for the given type-spec. if (Intrinsic::isOverloadedIntrinsic(Name)) Out.push_back(std::make_unique( Name, Proto, Merge, MergeSuffix, MemEltType, LLVMName, Flags, - ImmChecks, TS, ClassG, *this, Guard)); + ImmChecks, TS, ClassG, *this, Guard, Attrs)); + } +} + +void SVEEmitter::createCoreHeaderIntrinsics(raw_ostream &OS, ACLEKind Kind) { + SmallVector, 128> Defs; + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + for (auto *R : RV) + createIntrinsic(R, Defs); + + // Sort intrinsics in header file by following order/priority: + // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) + // - Class (is intrinsic overloaded or not) + // - Intrinsic name + std::stable_sort(Defs.begin(), Defs.end(), + [](const std::unique_ptr &A, + const std::unique_ptr &B) { + auto ToTuple = [](const std::unique_ptr &I) { + return std::make_tuple(I->getGuard(), + (unsigned)I->getClassKind(), + I->getName()); + }; + return ToTuple(A) < ToTuple(B); + }); + + StringRef InGuard = ""; + for (auto &I : Defs) { + // Emit #endif/#if pair if needed. + if (I->getGuard() != InGuard) { + if (!InGuard.empty()) + OS << "#endif //" << InGuard << "\n"; + InGuard = I->getGuard(); + if (!InGuard.empty()) + OS << "\n#if " << InGuard << "\n"; + } + + // Actually emit the intrinsic declaration. + I->emitIntrinsic(OS, Kind); } + + if (!InGuard.empty()) + OS << "#endif //" << InGuard << "\n"; } void SVEEmitter::createHeader(raw_ostream &OS) { @@ -1154,7 +1288,9 @@ OS << "typedef __clang_svfloat16x4_t svfloat16x4_t;\n"; OS << "typedef __clang_svfloat32x4_t svfloat32x4_t;\n"; OS << "typedef __clang_svfloat64x4_t svfloat64x4_t;\n"; - OS << "typedef __SVBool_t svbool_t;\n\n"; + OS << "typedef __SVBool_t svbool_t;\n"; + OS << "typedef __clang_svboolx2_t svboolx2_t;\n"; + OS << "typedef __clang_svboolx4_t svboolx4_t;\n\n"; OS << "#ifdef __ARM_FEATURE_SVE_BF16\n"; OS << "typedef __clang_svbfloat16x2_t svbfloat16x2_t;\n"; @@ -1162,6 +1298,10 @@ OS << "typedef __clang_svbfloat16x4_t svbfloat16x4_t;\n"; OS << "#endif\n"; + OS << "#ifdef __ARM_FEATURE_SME2\n"; + OS << "typedef __SVCount_t svcount_t;\n\n"; + OS << "#endif\n"; + OS << "enum svpattern\n"; OS << "{\n"; OS << " SV_POW2 = 0,\n"; @@ -1204,6 +1344,15 @@ "__nodebug__))\n\n"; OS << "#define __aio static __inline__ __attribute__((__always_inline__, " "__nodebug__, __overloadable__))\n\n"; + OS << "#ifdef __ARM_FEATURE_SME\n"; + OS << "#define __asc __attribute__((arm_streaming_compatible))\n"; + OS << "#else\n"; + OS << "#define __asc\n"; + OS << "#endif\n\n"; + + OS << "#if defined(__ARM_FEATURE_SME2)\n"; + OS << "#define __as __attribute__((arm_streaming))\n"; + OS << "#endif\n\n"; // Add reinterpret functions. for (auto ShortForm : { false, true } ) @@ -1227,41 +1376,7 @@ OS << "#endif /* #if defined(__ARM_FEATURE_SVE_BF16) */\n"; } - SmallVector, 128> Defs; - std::vector RV = Records.getAllDerivedDefinitions("Inst"); - for (auto *R : RV) - createIntrinsic(R, Defs); - - // Sort intrinsics in header file by following order/priority: - // - Architectural guard (i.e. does it require SVE2 or SVE2_AES) - // - Class (is intrinsic overloaded or not) - // - Intrinsic name - std::stable_sort( - Defs.begin(), Defs.end(), [](const std::unique_ptr &A, - const std::unique_ptr &B) { - auto ToTuple = [](const std::unique_ptr &I) { - return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName()); - }; - return ToTuple(A) < ToTuple(B); - }); - - StringRef InGuard = ""; - for (auto &I : Defs) { - // Emit #endif/#if pair if needed. - if (I->getGuard() != InGuard) { - if (!InGuard.empty()) - OS << "#endif //" << InGuard << "\n"; - InGuard = I->getGuard(); - if (!InGuard.empty()) - OS << "\n#if " << InGuard << "\n"; - } - - // Actually emit the intrinsic declaration. - I->emitIntrinsic(OS); - } - - if (!InGuard.empty()) - OS << "#endif //" << InGuard << "\n"; + createCoreHeaderIntrinsics(OS, ACLEKind::SVE); OS << "#if defined(__ARM_FEATURE_SVE_BF16)\n"; OS << "#define svcvtnt_bf16_x svcvtnt_bf16_m\n"; @@ -1288,7 +1403,7 @@ OS << "#endif /* __ARM_SVE_H */\n"; } -void SVEEmitter::createBuiltins(raw_ostream &OS) { +void SVEEmitter::createBuiltins(raw_ostream &OS, ACLEKind Kind) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) @@ -1300,13 +1415,35 @@ return A->getMangledName() < B->getMangledName(); }); - OS << "#ifdef GET_SVE_BUILTINS\n"; + switch (Kind) { + case ACLEKind::SME: + OS << "#ifdef GET_SME_BUILTINS\n"; + break; + case ACLEKind::SVE: + OS << "#ifdef GET_SVE_BUILTINS\n"; + break; + } + for (auto &Def : Defs) { // Only create BUILTINs for non-overloaded intrinsics, as overloaded // declarations only live in the header file. - if (Def->getClassKind() != ClassG) - OS << "BUILTIN(__builtin_sve_" << Def->getMangledName() << ", \"" - << Def->getBuiltinTypeStr() << "\", \"n\")\n"; + if (Def->getClassKind() != ClassG) { + switch (Kind) { + case ACLEKind::SME: + OS << "BUILTIN(__builtin_sme_"; + break; + case ACLEKind::SVE: + OS << "BUILTIN(__builtin_sve_"; + } + + OS << Def->getMangledName() << ", \"" << Def->getBuiltinTypeStr() + << "\", \"n\")\n"; + } + } + + if (Kind == ACLEKind::SME) { + OS << "#endif\n\n"; + return; } // Add reinterpret builtins @@ -1317,9 +1454,9 @@ << "\", \"n\")\n"; OS << "#endif\n\n"; - } +} -void SVEEmitter::createCodeGenMap(raw_ostream &OS) { +void SVEEmitter::createCodeGenMap(raw_ostream &OS, ACLEKind Kind) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) @@ -1331,7 +1468,15 @@ return A->getMangledName() < B->getMangledName(); }); - OS << "#ifdef GET_SVE_LLVM_INTRINSIC_MAP\n"; + switch (Kind) { + case ACLEKind::SME: + OS << "#ifdef GET_SME_LLVM_INTRINSIC_MAP\n"; + break; + case ACLEKind::SVE: + OS << "#ifdef GET_SVE_LLVM_INTRINSIC_MAP\n"; + break; + } + for (auto &Def : Defs) { // Builtins only exist for non-overloaded intrinsics, overloaded // declarations only live in the header file. @@ -1343,16 +1488,26 @@ std::string LLVMName = Def->getLLVMName(); std::string Builtin = Def->getMangledName(); - if (!LLVMName.empty()) - OS << "SVEMAP1(" << Builtin << ", " << LLVMName << ", " << FlagString - << "),\n"; - else - OS << "SVEMAP2(" << Builtin << ", " << FlagString << "),\n"; + switch (Kind) { + case ACLEKind::SME: + OS << "SME"; + break; + case ACLEKind::SVE: + OS << "SVE"; + break; + } + if (!LLVMName.empty()) { + OS << "MAP1("; + OS << Builtin << ", " << LLVMName << ", " << FlagString << "),\n"; + } else { + OS << "MAP2("; + OS << Builtin << ", " << FlagString << "),\n"; + } } OS << "#endif\n\n"; } -void SVEEmitter::createRangeChecks(raw_ostream &OS) { +void SVEEmitter::createRangeChecks(raw_ostream &OS, ACLEKind Kind) { std::vector RV = Records.getAllDerivedDefinitions("Inst"); SmallVector, 128> Defs; for (auto *R : RV) @@ -1364,8 +1519,14 @@ return A->getMangledName() < B->getMangledName(); }); - - OS << "#ifdef GET_SVE_IMMEDIATE_CHECK\n"; + switch (Kind) { + case ACLEKind::SME: + OS << "#ifdef GET_SME_IMMEDIATE_CHECK\n"; + break; + case ACLEKind::SVE: + OS << "#ifdef GET_SVE_IMMEDIATE_CHECK\n"; + break; + } // Ensure these are only emitted once. std::set Emitted; @@ -1375,7 +1536,15 @@ Def->getImmChecks().empty()) continue; - OS << "case SVE::BI__builtin_sve_" << Def->getMangledName() << ":\n"; + switch (Kind) { + case ACLEKind::SME: + OS << "case SME::BI__builtin_sme_"; + break; + case ACLEKind::SVE: + OS << "case SVE::BI__builtin_sve_"; + break; + } + OS << Def->getMangledName() << ":\n"; for (auto &Check : Def->getImmChecks()) OS << "ImmChecks.push_back(std::make_tuple(" << Check.getArg() << ", " << Check.getKind() << ", " << Check.getElementSizeInBits() << "));\n"; @@ -1415,25 +1584,179 @@ OS << "#endif\n\n"; } +void SVEEmitter::createSmeHeader(raw_ostream &OS) { + OS << "/*===---- arm_sme.h - ARM SME intrinsics " + "-----------------------------------===\n" + " *\n" + " *\n" + " * Part of the LLVM Project, under the Apache License v2.0 with LLVM " + "Exceptions.\n" + " * See https://llvm.org/LICENSE.txt for license information.\n" + " * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception\n" + " *\n" + " *===-----------------------------------------------------------------" + "------===\n" + " */\n\n"; + + OS << "#ifndef __ARM_SME_H\n"; + OS << "#define __ARM_SME_H\n\n"; + + OS << "#if !defined(__ARM_FEATURE_SME)\n"; + OS << "#error \"SME support not enabled\"\n"; + OS << "#else\n\n"; + + OS << "#if !defined(__LITTLE_ENDIAN__)\n"; + OS << "#error \"Big endian is currently not supported for arm_sme.h\"\n"; + OS << "#endif\n"; + + OS << "#include \n\n"; + OS << "#include \n\n"; + OS << "#ifdef __cplusplus\n"; + OS << "extern \"C\" {\n"; + OS << "#else\n"; + OS << "#include \n"; + OS << "#endif\n\n"; + + OS << "#include \"arm_sve.h\"\n\n"; + + OS << "/* Function attributes */\n"; + OS << "#define __ai static __inline__ __attribute__((__always_inline__, " + "__nodebug__))\n\n"; + OS << "#define __aio static __inline__ __attribute__((__always_inline__, " + "__nodebug__, __overloadable__))\n\n"; + OS << "#define __as __attribute__((arm_streaming))\n\n"; + OS << "#define __asc __attribute__((arm_streaming_compatible))\n\n"; + OS << "#define __asza __attribute__((arm_shared_za))\n\n"; + OS << "#define __apza __attribute__((arm_preserves_za))\n\n"; + + OS << "__asc void __arm_disable_za(void);\n"; + OS << "__ai __asc __asza void svundef_za(void) { }\n\n"; + + createCoreHeaderIntrinsics(OS, ACLEKind::SME); + + OS << "__ai __as __asza void svzero_za(void) { svzero_mask_za(255); }\n\n"; + + OS << "__asc __apza void *__arm_sc_memcpy(void *dest, const void *src, size_t n);\n"; + OS << "__asc __apza void *__arm_sc_memmove(void *dest, const void *src, size_t n);\n"; + OS << "__asc __apza void *__arm_sc_memset(void *s, int c, size_t n);\n"; + OS << "__asc __apza void *__arm_sc_memchr(void *s, int c, size_t n);\n\n"; + + OS << "#ifdef __cplusplus\n"; + OS << "} // extern \"C\"\n"; + OS << "#endif\n\n"; + OS << "#endif /*__ARM_FEATURE_SME */\n\n"; + OS << "#endif /* __ARM_SME_H */\n"; +} + +/// Create the SMETypeFlags used in CGBuiltins +void SVEEmitter::createSmeTypeFlags(raw_ostream &OS) { + OS << "#ifdef LLVM_GET_SME_IMMCHECKTYPES\n"; + for (auto &KV : ImmCheckTypes) + OS << " " << KV.getKey() << " = " << KV.getValue() << ",\n"; + OS << "#endif\n\n"; +} + +void SVEEmitter::createStreamingAttrs(raw_ostream &OS, ACLEKind Kind) { + std::vector RV = Records.getAllDerivedDefinitions("Inst"); + SmallVector, 128> Defs; + for (auto *R : RV) + createIntrinsic(R, Defs); + + // The mappings must be sorted based on BuiltinID. + llvm::sort(Defs, [](const std::unique_ptr &A, + const std::unique_ptr &B) { + return A->getMangledName() < B->getMangledName(); + }); + + switch (Kind) { + case ACLEKind::SME: + OS << "#ifdef GET_SME_STREAMING_ATTRS\n"; + break; + case ACLEKind::SVE: + OS << "#ifdef GET_SVE_STREAMING_ATTRS\n"; + break; + } + + // Ensure these are only emitted once. + std::set Emitted; + + uint64_t IsStreamingFlag = getEnumValueForFlag("IsStreaming"); + uint64_t IsStreamingCompatibleFlag = + getEnumValueForFlag("IsStreamingCompatible"); + for (auto &Def : Defs) { + if (Emitted.find(Def->getMangledName()) != Emitted.end()) + continue; + + switch (Kind) { + case ACLEKind::SME: + OS << "case SME::BI__builtin_sme_"; + break; + case ACLEKind::SVE: + OS << "case SVE::BI__builtin_sve_"; + break; + } + OS << Def->getMangledName() << ":\n"; + + if (Def->isFlagSet(IsStreamingFlag)) + OS << " BuiltinType = ArmStreaming;\n"; + else if (Def->isFlagSet(IsStreamingCompatibleFlag)) + OS << " BuiltinType = ArmStreamingCompatible;\n"; + else + OS << " BuiltinType = ArmNonStreaming;\n"; + OS << " break;\n"; + + Emitted.insert(Def->getMangledName()); + } + + OS << "#endif\n\n"; +} + namespace clang { void EmitSveHeader(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createHeader(OS); } void EmitSveBuiltins(RecordKeeper &Records, raw_ostream &OS) { - SVEEmitter(Records).createBuiltins(OS); + SVEEmitter(Records).createBuiltins(OS, ACLEKind::SVE); } void EmitSveBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { - SVEEmitter(Records).createCodeGenMap(OS); + SVEEmitter(Records).createCodeGenMap(OS, ACLEKind::SVE); } void EmitSveRangeChecks(RecordKeeper &Records, raw_ostream &OS) { - SVEEmitter(Records).createRangeChecks(OS); + SVEEmitter(Records).createRangeChecks(OS, ACLEKind::SVE); } void EmitSveTypeFlags(RecordKeeper &Records, raw_ostream &OS) { SVEEmitter(Records).createTypeFlags(OS); } +void EmitSveStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SVE); +} + +void EmitSmeHeader(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSmeHeader(OS); +} + +void EmitSmeBuiltins(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createBuiltins(OS, ACLEKind::SME); +} + +void EmitSmeBuiltinCG(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createCodeGenMap(OS, ACLEKind::SME); +} + +void EmitSmeRangeChecks(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createRangeChecks(OS, ACLEKind::SME); +} + +void EmitSmeTypeFlags(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createSmeTypeFlags(OS); +} + +void EmitSmeStreamingAttrs(RecordKeeper &Records, raw_ostream &OS) { + SVEEmitter(Records).createStreamingAttrs(OS, ACLEKind::SME); +} } // End namespace clang diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -81,6 +81,13 @@ GenArmSveBuiltinCG, GenArmSveTypeFlags, GenArmSveRangeChecks, + GenArmSveStreamingAttrs, + GenArmSmeHeader, + GenArmSmeBuiltins, + GenArmSmeBuiltinCG, + GenArmSmeTypeFlags, + GenArmSmeRangeChecks, + GenArmSmeStreamingAttrs, GenArmCdeHeader, GenArmCdeBuiltinDef, GenArmCdeBuiltinSema, @@ -219,6 +226,20 @@ "Generate arm_sve_typeflags.inc for clang"), clEnumValN(GenArmSveRangeChecks, "gen-arm-sve-sema-rangechecks", "Generate arm_sve_sema_rangechecks.inc for clang"), + clEnumValN(GenArmSveStreamingAttrs, "gen-arm-sve-streaming-attrs", + "Generate arm_sve_streaming_attrs.inc for clang"), + clEnumValN(GenArmSmeHeader, "gen-arm-sme-header", + "Generate arm_sme.h for clang"), + clEnumValN(GenArmSmeBuiltins, "gen-arm-sme-builtins", + "Generate arm_sme_builtins.inc for clang"), + clEnumValN(GenArmSmeBuiltinCG, "gen-arm-sme-builtin-codegen", + "Generate arm_sme_builtin_cg_map.inc for clang"), + clEnumValN(GenArmSmeTypeFlags, "gen-arm-sme-typeflags", + "Generate arm_sme_typeflags.inc for clang"), + clEnumValN(GenArmSmeRangeChecks, "gen-arm-sme-sema-rangechecks", + "Generate arm_sme_sema_rangechecks.inc for clang"), + clEnumValN(GenArmSmeStreamingAttrs, "gen-arm-sme-streaming-attrs", + "Generate arm_sme_streaming_attrs.inc for clang"), clEnumValN(GenArmMveHeader, "gen-arm-mve-header", "Generate arm_mve.h for clang"), clEnumValN(GenArmMveBuiltinDef, "gen-arm-mve-builtin-def", @@ -438,6 +459,27 @@ case GenArmSveRangeChecks: EmitSveRangeChecks(Records, OS); break; + case GenArmSveStreamingAttrs: + EmitSveStreamingAttrs(Records, OS); + break; + case GenArmSmeHeader: + EmitSmeHeader(Records, OS); + break; + case GenArmSmeBuiltins: + EmitSmeBuiltins(Records, OS); + break; + case GenArmSmeBuiltinCG: + EmitSmeBuiltinCG(Records, OS); + break; + case GenArmSmeTypeFlags: + EmitSmeTypeFlags(Records, OS); + break; + case GenArmSmeRangeChecks: + EmitSmeRangeChecks(Records, OS); + break; + case GenArmSmeStreamingAttrs: + EmitSmeStreamingAttrs(Records, OS); + break; case GenArmCdeHeader: EmitCdeHeader(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -100,6 +100,14 @@ void EmitSveBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitSveRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSveStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); + +void EmitSmeHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltins(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeBuiltinCG(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeTypeFlags(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeRangeChecks(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitSmeStreamingAttrs(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveHeader(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitMveBuiltinDef(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3623,6 +3623,31 @@ x86_mmx +AArch64 Predicate-as-Counter Type +""""""""""""""""""""""""""""""""" + +:Overview: + +The ``aarch64_svcount`` type represents the type of a predicate-as-counter value +held in a AArch64 SVE predicate register. Such a value contains information +about the number of active lanes, the element width and a bit that tells whether +the generated mask should be inverted. Target-specific intrinsics should be used +to move the aarch64_svcount value to/from a predicate vector. + +There are certain limitations on the type: +* The type can be used for function parameters and return values. +* The supported LLVM operations on this type are strictly limited to ``load``, + ``store``, ``phi`` and ``alloca`` instructions. + +The predicate-as-counter type is a scalable type. + +:Syntax: + +:: + + aarch64_svcount + + .. _t_pointer: Pointer Type diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -654,9 +654,9 @@ } /// Return an expression for sizeof ScalableTy that is type IntTy, where - /// ScalableTy is a scalable vector type. - const SCEV *getSizeOfScalableVectorExpr(Type *IntTy, - ScalableVectorType *ScalableTy); + /// ScalableTy is a scalable vector type or an AArch64 predicate-as-counter + /// (opaque) type. + const SCEV *getSizeOfScalableTypeExpr(Type *IntTy, Type *ScalableTy); /// Return an expression for the alloc size of AllocTy that is type IntTy const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy); diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h --- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h +++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h @@ -175,6 +175,7 @@ TYPE_CODE_X86_AMX = 24, // X86 AMX TYPE_CODE_OPAQUE_POINTER = 25, // OPAQUE_POINTER: [addrspace] + TYPE_CODE_AARCH64_SVCOUNT = 26, // AArch64 predicate-as-counter }; enum OperandBundleTagCode { diff --git a/llvm/include/llvm/CodeGen/ValueTypes.h b/llvm/include/llvm/CodeGen/ValueTypes.h --- a/llvm/include/llvm/CodeGen/ValueTypes.h +++ b/llvm/include/llvm/CodeGen/ValueTypes.h @@ -122,7 +122,7 @@ /// Test if the given EVT has zero size, this will fail if called on a /// scalable type bool isZeroSized() const { - return !isScalableVector() && getSizeInBits() == 0; + return getSizeInBits().getKnownMinValue() == 0; } /// Test if the given EVT is simple (as opposed to being extended). diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td --- a/llvm/include/llvm/CodeGen/ValueTypes.td +++ b/llvm/include/llvm/CodeGen/ValueTypes.td @@ -227,6 +227,8 @@ def externref : ValueType<0, 184>; // WebAssembly's externref type def x86amx : ValueType<8192, 185>; // X86 AMX value def i64x8 : ValueType<512, 186>; // 8 Consecutive GPRs (AArch64) +def aarch64svcount + : ValueType<16, 187>; // AArch64 predicate-as-counter def token : ValueType<0, 248>; // TokenTy def MetadataVT : ValueType<0, 249>; // Metadata diff --git a/llvm/include/llvm/IR/DataLayout.h b/llvm/include/llvm/IR/DataLayout.h --- a/llvm/include/llvm/IR/DataLayout.h +++ b/llvm/include/llvm/IR/DataLayout.h @@ -705,6 +705,8 @@ // only 80 bits contain information. case Type::X86_FP80TyID: return TypeSize::Fixed(80); + case Type::AArch64SvcountTyID: + return TypeSize::Scalable(16); case Type::FixedVectorTyID: case Type::ScalableVectorTyID: { VectorType *VTy = cast(Ty); diff --git a/llvm/include/llvm/IR/Intrinsics.h b/llvm/include/llvm/IR/Intrinsics.h --- a/llvm/include/llvm/IR/Intrinsics.h +++ b/llvm/include/llvm/IR/Intrinsics.h @@ -143,6 +143,7 @@ AMX, PPCQuad, AnyPtrToElt, + AArch64Svcount, } Kind; union { diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -269,6 +269,8 @@ def llvm_x86mmx_ty : LLVMType; def llvm_ptrx86mmx_ty : LLVMPointerType; // <1 x i64>* +def llvm_aarch64_svcount_ty : LLVMType; + def llvm_x86amx_ty : LLVMType; def llvm_v2i1_ty : LLVMType; // 2 x i1 diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2522,6 +2522,46 @@ def int_aarch64_sve_bfdot_lane : SVE_4Vec_BF16_Indexed; def int_aarch64_sve_bfmlalb_lane : SVE_4Vec_BF16_Indexed; def int_aarch64_sve_bfmlalt_lane : SVE_4Vec_BF16_Indexed; + +// +// SVE2.1 - Contiguous loads to multiple consecutive vectors +// + + class SVE2p1_Load_PN_VG2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_aarch64_svcount_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + + class SVE2p1_Load_PN_VG4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_aarch64_svcount_ty, llvm_ptr_ty], + [IntrReadMem, IntrArgMemOnly]>; + +def int_aarch64_sve_ld1_pn_vg2 : SVE2p1_Load_PN_VG2_Intrinsic; +def int_aarch64_sve_ld1_pn_vg4 : SVE2p1_Load_PN_VG4_Intrinsic; +def int_aarch64_sve_ldnt1_pn_vg2 : SVE2p1_Load_PN_VG2_Intrinsic; +def int_aarch64_sve_ldnt1_pn_vg4 : SVE2p1_Load_PN_VG4_Intrinsic; + +// +// SVE2.1 - Contiguous stores to multiple consecutive vectors +// + + class SVE2p1_Store_PN_VG2_Intrinsic + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>, + llvm_aarch64_svcount_ty, llvm_ptr_ty ], + [IntrWriteMem, IntrArgMemOnly]>; + + class SVE2p1_Store_PN_VG4_Intrinsic + : DefaultAttrsIntrinsic<[], [ llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, + llvm_aarch64_svcount_ty, llvm_ptr_ty], + [IntrWriteMem, IntrArgMemOnly]>; + +def int_aarch64_sve_st1_pn_vg2 : SVE2p1_Store_PN_VG2_Intrinsic; +def int_aarch64_sve_st1_pn_vg4 : SVE2p1_Store_PN_VG4_Intrinsic; +def int_aarch64_sve_stnt1_pn_vg2 : SVE2p1_Store_PN_VG2_Intrinsic; +def int_aarch64_sve_stnt1_pn_vg4 : SVE2p1_Store_PN_VG4_Intrinsic; } // @@ -2541,7 +2581,7 @@ let TargetPrefix = "aarch64" in { class SME_Load_Store_Intrinsic : DefaultAttrsIntrinsic<[], - [pred_ty, llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty], []>; + [pred_ty, llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; // Loads def int_aarch64_sme_ld1b_horiz : SME_Load_Store_Intrinsic; @@ -2575,11 +2615,12 @@ class SME_TileToVector_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty], - [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i64_ty, llvm_i32_ty]>; + [LLVMMatchType<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty, llvm_i32_ty], [ImmArg>]>; class SME_VectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_i64_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty]>; + [llvm_i32_ty, llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_anyvector_ty], [ImmArg>]>; def int_aarch64_sme_read_horiz : SME_TileToVector_Intrinsic; def int_aarch64_sme_read_vert : SME_TileToVector_Intrinsic; @@ -2591,15 +2632,15 @@ def int_aarch64_sme_writeq_horiz : SME_VectorToTile_Intrinsic; def int_aarch64_sme_writeq_vert : SME_VectorToTile_Intrinsic; - def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i64_ty]>; + def int_aarch64_sme_zero : DefaultAttrsIntrinsic<[], [llvm_i32_ty], [ImmArg>]>; class SME_OuterProduct_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_i64_ty, + [llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, - llvm_anyvector_ty]>; + llvm_anyvector_ty], [ImmArg>]>; def int_aarch64_sme_mopa : SME_OuterProduct_Intrinsic; def int_aarch64_sme_mops : SME_OuterProduct_Intrinsic; @@ -2618,10 +2659,10 @@ class SME_AddVectorToTile_Intrinsic : DefaultAttrsIntrinsic<[], - [llvm_i64_ty, + [llvm_i32_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, - llvm_anyvector_ty]>; + llvm_anyvector_ty], [ImmArg>]>; def int_aarch64_sme_addha : SME_AddVectorToTile_Intrinsic; def int_aarch64_sme_addva : SME_AddVectorToTile_Intrinsic; @@ -2638,6 +2679,8 @@ def int_aarch64_sme_cntsw : AdvSIMD_SME_CNTSB_Intrinsic; def int_aarch64_sme_cntsd : AdvSIMD_SME_CNTSB_Intrinsic; + def int_aarch64_sme_get_live_za_slices : AdvSIMD_SME_CNTSB_Intrinsic; + // // PSTATE Functions // @@ -2649,6 +2692,10 @@ : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrNoMem, IntrHasSideEffects]>; + def int_aarch64_sme_invoke_resume_pstatesm + : DefaultAttrsIntrinsic<[], [llvm_i64_ty], [IntrHasSideEffects]>; + + // def int_aarch64_sme_za_enable : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>; def int_aarch64_sme_za_disable @@ -2674,4 +2721,194 @@ : DefaultAttrsIntrinsic<[llvm_anyvector_ty], [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, llvm_i32_ty]>; + + // + // Predicate-as-counter intrinsics + // + + def int_aarch64_sve_pext + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_aarch64_svcount_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_aarch64_sve_ptrue_c8 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + def int_aarch64_sve_ptrue_c16 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + def int_aarch64_sve_ptrue_c32 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + def int_aarch64_sve_ptrue_c64 + : DefaultAttrsIntrinsic<[llvm_aarch64_svcount_ty], [], [IntrNoMem]>; + + // + // SME2 Intrinsics + // + + class SME2_Matrix_ArrayVector_Single_Single_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>], + []>; + + class SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>], + []>; + + class SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>], + []>; + + class SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + []>; + + class SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + []>; + + class SME2_Matrix_ArrayVector_Single_Index_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, + LLVMMatchType<0>, llvm_i32_ty], + [ImmArg>]>; + + class SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, llvm_i32_ty], + [ImmArg>]>; + + class SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, + LLVMMatchType<0>, llvm_i32_ty], + [ImmArg>]>; + + class SME2_VG2_Multi_Imm_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, + llvm_i32_ty], + [ImmArg>]>; + + class SME2_VG4_Multi_Imm_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, LLVMMatchType<1>, + LLVMMatchType<1>, LLVMMatchType<1>, + llvm_i32_ty], + [ImmArg>]>; + + class SME2_ZA_Read_VG2_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], + [llvm_i32_ty], + []>; + + class SME2_ZA_Read_VG4_Intrinsic + : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_i32_ty], + []>; + + class SME2_ZA_Write_VG2_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>], + []>; + + class SME2_ZA_Write_VG4_Intrinsic + : DefaultAttrsIntrinsic<[], + [llvm_i32_ty, + llvm_anyvector_ty, LLVMMatchType<0>, + LLVMMatchType<0>, LLVMMatchType<0>], + []>; + + // + // Multi-vector fused multiply-add/subtract + // + + def int_aarch64_sme_fmla_single_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic; + def int_aarch64_sme_fmls_single_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic; + def int_aarch64_sme_fmla_single_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic; + def int_aarch64_sme_fmls_single_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic; + + def int_aarch64_sme_fmla_multi_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic; + def int_aarch64_sme_fmls_multi_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic; + def int_aarch64_sme_fmla_multi_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic; + def int_aarch64_sme_fmls_multi_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic; + + def int_aarch64_sme_fmla_lane_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic; + def int_aarch64_sme_fmls_lane_vg1x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic; + def int_aarch64_sme_fmla_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + def int_aarch64_sme_fmls_lane_vg1x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + + // + // Multi-vector multiply-add/subtract long + // + + foreach ty = ["f", "s", "u"] in { + foreach instr = ["mlal", "mlsl"] in { + def int_aarch64_sme_ # ty # instr # _single_vg2x1 : SME2_Matrix_ArrayVector_Single_Single_Intrinsic; + def int_aarch64_sme_ # ty # instr # _single_vg2x2 : SME2_Matrix_ArrayVector_VG2_Multi_Single_Intrinsic; + def int_aarch64_sme_ # ty # instr # _single_vg2x4 : SME2_Matrix_ArrayVector_VG4_Multi_Single_Intrinsic; + + def int_aarch64_sme_ # ty # instr # _multi_vg2x2 : SME2_Matrix_ArrayVector_VG2_Multi_Multi_Intrinsic; + def int_aarch64_sme_ # ty # instr # _multi_vg2x4 : SME2_Matrix_ArrayVector_VG4_Multi_Multi_Intrinsic; + + def int_aarch64_sme_ # ty # instr # _lane_vg2x1 : SME2_Matrix_ArrayVector_Single_Index_Intrinsic; + def int_aarch64_sme_ # ty # instr # _lane_vg2x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic; + def int_aarch64_sme_ # ty # instr # _lane_vg2x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + } + } + + // Multi-vector saturating rounding shift right intrinsics + + def int_aarch64_sve_sqrshr_vgx2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshr_vgx2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshr_vgx4 : SME2_VG4_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshr_vgx4 : SME2_VG4_Multi_Imm_Intrinsic; + + def int_aarch64_sve_sqrshrn_vgx2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshrn_vgx2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshrn_vgx4 : SME2_VG4_Multi_Imm_Intrinsic; + def int_aarch64_sve_uqrshrn_vgx4 : SME2_VG4_Multi_Imm_Intrinsic; + + def int_aarch64_sve_sqrshru_vgx2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshru_vgx4 : SME2_VG4_Multi_Imm_Intrinsic; + + def int_aarch64_sve_sqrshrun_vgx2 : SME2_VG2_Multi_Imm_Intrinsic; + def int_aarch64_sve_sqrshrun_vgx4 : SME2_VG4_Multi_Imm_Intrinsic; + + // Move multi-vectors to/from ZA + + def int_aarch64_sme_read_hor_vg2 : SME2_ZA_Read_VG2_Intrinsic; + def int_aarch64_sme_read_hor_vg4 : SME2_ZA_Read_VG4_Intrinsic; + + def int_aarch64_sme_read_ver_vg2 : SME2_ZA_Read_VG2_Intrinsic; + def int_aarch64_sme_read_ver_vg4 : SME2_ZA_Read_VG4_Intrinsic; + + def int_aarch64_sme_read_vg1x2 : SME2_ZA_Read_VG2_Intrinsic; + def int_aarch64_sme_read_vg1x4 : SME2_ZA_Read_VG4_Intrinsic; + + def int_aarch64_sme_write_hor_vg2 : SME2_ZA_Write_VG2_Intrinsic; + def int_aarch64_sme_write_hor_vg4 : SME2_ZA_Write_VG4_Intrinsic; + + def int_aarch64_sme_write_ver_vg2 : SME2_ZA_Write_VG2_Intrinsic; + def int_aarch64_sme_write_ver_vg4 : SME2_ZA_Write_VG4_Intrinsic; + + def int_aarch64_sme_write_vg1x2 : SME2_ZA_Write_VG2_Intrinsic; + def int_aarch64_sme_write_vg1x4 : SME2_ZA_Write_VG4_Intrinsic; } diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h --- a/llvm/include/llvm/IR/Type.h +++ b/llvm/include/llvm/IR/Type.h @@ -65,6 +65,7 @@ MetadataTyID, ///< Metadata X86_MMXTyID, ///< MMX vectors (64 bits, X86 specific) X86_AMXTyID, ///< AMX vectors (8192 bits, X86 specific) + AArch64SvcountTyID, /// AArch64 predicate-as-counter (vscale x 2bits) TokenTyID, ///< Tokens // Derived types... see DerivedTypes.h file. @@ -180,6 +181,9 @@ /// Return true if this is X86 AMX. bool isX86_AMXTy() const { return getTypeID() == X86_AMXTyID; } + /// Return true if this is a AArch64 Predicate-as-counter type. + bool isAArch64SvcountTy() const { return getTypeID() == AArch64SvcountTyID; } + /// Return true if this is a FP type or a vector of FP. bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } @@ -270,7 +274,7 @@ // If it's a primitive, it is always sized. if (getTypeID() == IntegerTyID || isFloatingPointTy() || getTypeID() == PointerTyID || getTypeID() == X86_MMXTyID || - getTypeID() == X86_AMXTyID) + getTypeID() == X86_AMXTyID || getTypeID() == AArch64SvcountTyID) return true; // If it is not something that can have a size (e.g. a function or label), // it doesn't have a size. @@ -430,6 +434,7 @@ static Type *getPPC_FP128Ty(LLVMContext &C); static Type *getX86_MMXTy(LLVMContext &C); static Type *getX86_AMXTy(LLVMContext &C); + static Type *getAArch64SvcountTy(LLVMContext &C); static Type *getTokenTy(LLVMContext &C); static IntegerType *getIntNTy(LLVMContext &C, unsigned N); static IntegerType *getInt1Ty(LLVMContext &C); diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h --- a/llvm/include/llvm/Support/MachineValueType.h +++ b/llvm/include/llvm/Support/MachineValueType.h @@ -281,9 +281,10 @@ externref = 184, // WebAssembly's externref type x86amx = 185, // This is an X86 AMX value i64x8 = 186, // 8 Consecutive GPRs (AArch64) + aarch64svcount = 187, // AArch64 predicate-as-counter FIRST_VALUETYPE = 1, // This is always the beginning of the list. - LAST_VALUETYPE = i64x8, // This always remains at the end of the list. + LAST_VALUETYPE = aarch64svcount, // This always remains at the end of the list. VALUETYPE_SIZE = LAST_VALUETYPE + 1, // This is the current maximum for LAST_VALUETYPE. @@ -927,6 +928,7 @@ case v2i8: case v1i16: case v1f16: return TypeSize::Fixed(16); + case aarch64svcount: case nxv16i1: case nxv2i8: case nxv1i16: diff --git a/llvm/lib/Analysis/Loads.cpp b/llvm/lib/Analysis/Loads.cpp --- a/llvm/lib/Analysis/Loads.cpp +++ b/llvm/lib/Analysis/Loads.cpp @@ -202,7 +202,7 @@ const TargetLibraryInfo *TLI) { // For unsized types or scalable vectors we don't know exactly how many bytes // are dereferenced, so bail out. - if (!Ty->isSized() || isa(Ty)) + if (!Ty->isSized() || isa(Ty) || Ty->isAArch64SvcountTy()) return false; // When dereferenceability information is provided by a dereferenceable diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4250,8 +4250,10 @@ } const SCEV * -ScalarEvolution::getSizeOfScalableVectorExpr(Type *IntTy, - ScalableVectorType *ScalableTy) { +ScalarEvolution::getSizeOfScalableTypeExpr(Type *IntTy, Type *ScalableTy) { + assert((isa(ScalableTy) || + ScalableTy->isAArch64SvcountTy()) && + "Expected a scalable type"); Constant *NullPtr = Constant::getNullValue(ScalableTy->getPointerTo()); Constant *One = ConstantInt::get(IntTy, 1); Constant *GEP = ConstantExpr::getGetElementPtr(ScalableTy, NullPtr, One); @@ -4262,8 +4264,9 @@ } const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { - if (auto *ScalableAllocTy = dyn_cast(AllocTy)) - return getSizeOfScalableVectorExpr(IntTy, ScalableAllocTy); + if (isa(AllocTy) || AllocTy->isAArch64SvcountTy()) + return getSizeOfScalableTypeExpr(IntTy, AllocTy); + // We can bypass creating a target-independent constant expression and then // folding it back into a ConstantInt. This is just a compile-time // optimization. @@ -4271,8 +4274,9 @@ } const SCEV *ScalarEvolution::getStoreSizeOfExpr(Type *IntTy, Type *StoreTy) { - if (auto *ScalableStoreTy = dyn_cast(StoreTy)) - return getSizeOfScalableVectorExpr(IntTy, ScalableStoreTy); + if (isa(StoreTy) || StoreTy->isAArch64SvcountTy()) + return getSizeOfScalableTypeExpr(IntTy, StoreTy); + // We can bypass creating a target-independent constant expression and then // folding it back into a ConstantInt. This is just a compile-time // optimization. diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp --- a/llvm/lib/AsmParser/LLLexer.cpp +++ b/llvm/lib/AsmParser/LLLexer.cpp @@ -793,6 +793,7 @@ TYPEKEYWORD("metadata", Type::getMetadataTy(Context)); TYPEKEYWORD("x86_mmx", Type::getX86_MMXTy(Context)); TYPEKEYWORD("x86_amx", Type::getX86_AMXTy(Context)); + TYPEKEYWORD("aarch64_svcount", Type::getAArch64SvcountTy(Context)); TYPEKEYWORD("token", Type::getTokenTy(Context)); if (Keyword == "ptr") { diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp @@ -185,6 +185,7 @@ STRINGIFY_CODE(TYPE_CODE, PPC_FP128) STRINGIFY_CODE(TYPE_CODE, METADATA) STRINGIFY_CODE(TYPE_CODE, X86_MMX) + STRINGIFY_CODE(TYPE_CODE, AARCH64_SVCOUNT) STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON) STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME) STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2256,6 +2256,9 @@ case bitc::TYPE_CODE_X86_AMX: // X86_AMX ResultTy = Type::getX86_AMXTy(Context); break; + case bitc::TYPE_CODE_AARCH64_SVCOUNT: // AArch64Svcount + ResultTy = Type::getAArch64SvcountTy(Context); + break; case bitc::TYPE_CODE_TOKEN: // TOKEN ResultTy = Type::getTokenTy(Context); break; diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp --- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -950,6 +950,9 @@ case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; case Type::X86_AMXTyID: Code = bitc::TYPE_CODE_X86_AMX; break; + case Type::AArch64SvcountTyID: + Code = bitc::TYPE_CODE_AARCH64_SVCOUNT; + break; case Type::TokenTyID: Code = bitc::TYPE_CODE_TOKEN; break; case Type::IntegerTyID: // INTEGER: [width] diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -7696,7 +7696,7 @@ // whereas scalable vectors would have to be shifted by // <2log(vscale) + number of bits> in order to store the // low/high parts. Bailing out for now. - if (isa(StoreType)) + if (isa(StoreType) || StoreType->isAArch64SvcountTy()) return false; if (!DL.typeSizeEqualsStoreSize(StoreType) || diff --git a/llvm/lib/CodeGen/LowLevelType.cpp b/llvm/lib/CodeGen/LowLevelType.cpp --- a/llvm/lib/CodeGen/LowLevelType.cpp +++ b/llvm/lib/CodeGen/LowLevelType.cpp @@ -31,7 +31,7 @@ return LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace)); } - if (Ty.isSized()) { + if (Ty.isSized() && !Ty.isAArch64SvcountTy()) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. auto SizeInBits = DL.getTypeSizeInBits(&Ty); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17146,6 +17146,9 @@ SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); + if (N->getValueType(0) == MVT::aarch64svcount) + return SDValue(); + // If load is not volatile and there are no uses of the loaded value (and // the updated indexed value in case of indexed loads), change uses of the // chain value into uses of the chain input (i.e. delete the dead load). @@ -19370,6 +19373,9 @@ SDValue Value = ST->getValue(); SDValue Ptr = ST->getBasePtr(); + if (Value.getValueType() == MVT::aarch64svcount) + return SDValue(); + // If this is a store of a bit convert, store the input value if the // resultant store does not need a higher alignment than the original. if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -494,7 +494,6 @@ return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V, CallConv); - unsigned PartBits = PartVT.getSizeInBits(); unsigned OrigNumParts = NumParts; assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) && "Copying to an illegal type!"); @@ -510,6 +509,7 @@ return; } + unsigned PartBits = PartVT.getSizeInBits(); if (NumParts * PartBits > ValueVT.getSizeInBits()) { // If the parts cover more bits than the value has, promote the value. if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp --- a/llvm/lib/CodeGen/ValueTypes.cpp +++ b/llvm/lib/CodeGen/ValueTypes.cpp @@ -173,6 +173,8 @@ case MVT::Untyped: return "Untyped"; case MVT::funcref: return "funcref"; case MVT::externref: return "externref"; + case MVT::aarch64svcount: + return "aarch64svcount"; } } @@ -202,6 +204,8 @@ case MVT::f128: return Type::getFP128Ty(Context); case MVT::ppcf128: return Type::getPPC_FP128Ty(Context); case MVT::x86mmx: return Type::getX86_MMXTy(Context); + case MVT::aarch64svcount: + return Type::getAArch64SvcountTy(Context); case MVT::x86amx: return Type::getX86_AMXTy(Context); case MVT::i64x8: return IntegerType::get(Context, 512); case MVT::externref: @@ -557,6 +561,8 @@ case Type::DoubleTyID: return MVT(MVT::f64); case Type::X86_FP80TyID: return MVT(MVT::f80); case Type::X86_MMXTyID: return MVT(MVT::x86mmx); + case Type::AArch64SvcountTyID: + return MVT(MVT::aarch64svcount); case Type::X86_AMXTyID: return MVT(MVT::x86amx); case Type::FP128TyID: return MVT(MVT::f128); case Type::PPC_FP128TyID: return MVT(MVT::ppcf128); diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp --- a/llvm/lib/IR/AsmWriter.cpp +++ b/llvm/lib/IR/AsmWriter.cpp @@ -548,6 +548,9 @@ case Type::MetadataTyID: OS << "metadata"; return; case Type::X86_MMXTyID: OS << "x86_mmx"; return; case Type::X86_AMXTyID: OS << "x86_amx"; return; + case Type::AArch64SvcountTyID: + OS << "aarch64_svcount"; + return; case Type::TokenTyID: OS << "token"; return; case Type::IntegerTyID: OS << 'i' << cast(Ty)->getBitWidth(); diff --git a/llvm/lib/IR/DataLayout.cpp b/llvm/lib/IR/DataLayout.cpp --- a/llvm/lib/IR/DataLayout.cpp +++ b/llvm/lib/IR/DataLayout.cpp @@ -797,6 +797,7 @@ // layout. return Align(PowerOf2Ceil(BitWidth / 8)); } + case Type::AArch64SvcountTyID: case Type::X86_MMXTyID: case Type::FixedVectorTyID: case Type::ScalableVectorTyID: { diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -990,6 +990,7 @@ IIT_ANYPTR_TO_ELT = 56, IIT_I2 = 57, IIT_I4 = 58, + IIT_AARCH64_SVCOUNT = 59, }; static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, @@ -1048,6 +1049,9 @@ case IIT_I4: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 4)); return; + case IIT_AARCH64_SVCOUNT: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::AArch64Svcount, 0)); + return; case IIT_I8: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 8)); return; @@ -1289,6 +1293,7 @@ case IITDescriptor::Double: return Type::getDoubleTy(Context); case IITDescriptor::Quad: return Type::getFP128Ty(Context); case IITDescriptor::PPCQuad: return Type::getPPC_FP128Ty(Context); + case IITDescriptor::AArch64Svcount: return Type::getAArch64SvcountTy(Context); case IITDescriptor::Integer: return IntegerType::get(Context, D.Integer_Width); @@ -1475,6 +1480,7 @@ case IITDescriptor::Quad: return !Ty->isFP128Ty(); case IITDescriptor::PPCQuad: return !Ty->isPPC_FP128Ty(); case IITDescriptor::Integer: return !Ty->isIntegerTy(D.Integer_Width); + case IITDescriptor::AArch64Svcount: return !Ty->isAArch64SvcountTy(); case IITDescriptor::Vector: { VectorType *VT = dyn_cast(Ty); return !VT || VT->getElementCount() != D.Vector_Width || diff --git a/llvm/lib/IR/LLVMContextImpl.h b/llvm/lib/IR/LLVMContextImpl.h --- a/llvm/lib/IR/LLVMContextImpl.h +++ b/llvm/lib/IR/LLVMContextImpl.h @@ -1464,7 +1464,7 @@ // Basic type instances. Type VoidTy, LabelTy, HalfTy, BFloatTy, FloatTy, DoubleTy, MetadataTy, TokenTy; - Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy; + Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy, X86_AMXTy, AArch64SvcountTy; IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty, Int128Ty; std::unique_ptr TheNoneToken; diff --git a/llvm/lib/IR/LLVMContextImpl.cpp b/llvm/lib/IR/LLVMContextImpl.cpp --- a/llvm/lib/IR/LLVMContextImpl.cpp +++ b/llvm/lib/IR/LLVMContextImpl.cpp @@ -45,7 +45,8 @@ MetadataTy(C, Type::MetadataTyID), TokenTy(C, Type::TokenTyID), X86_FP80Ty(C, Type::X86_FP80TyID), FP128Ty(C, Type::FP128TyID), PPC_FP128Ty(C, Type::PPC_FP128TyID), X86_MMXTy(C, Type::X86_MMXTyID), - X86_AMXTy(C, Type::X86_AMXTyID), Int1Ty(C, 1), Int8Ty(C, 8), + X86_AMXTy(C, Type::X86_AMXTyID), + AArch64SvcountTy(C, Type::AArch64SvcountTyID), Int1Ty(C, 1), Int8Ty(C, 8), Int16Ty(C, 16), Int32Ty(C, 32), Int64Ty(C, 64), Int128Ty(C, 128) { if (OpaquePointersCL.getNumOccurrences()) { OpaquePointers = OpaquePointersCL; diff --git a/llvm/lib/IR/Type.cpp b/llvm/lib/IR/Type.cpp --- a/llvm/lib/IR/Type.cpp +++ b/llvm/lib/IR/Type.cpp @@ -232,7 +232,9 @@ Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; } Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; } Type *Type::getX86_AMXTy(LLVMContext &C) { return &C.pImpl->X86_AMXTy; } - +Type *Type::getAArch64SvcountTy(LLVMContext &C) { + return &C.pImpl->AArch64SvcountTy; +} IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; } IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; } IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; } diff --git a/llvm/lib/Support/LowLevelType.cpp b/llvm/lib/Support/LowLevelType.cpp --- a/llvm/lib/Support/LowLevelType.cpp +++ b/llvm/lib/Support/LowLevelType.cpp @@ -21,7 +21,7 @@ init(/*IsPointer=*/false, asVector, /*IsScalar=*/!asVector, VT.getVectorElementCount(), VT.getVectorElementType().getSizeInBits(), /*AddressSpace=*/0); - } else if (VT.isValid()) { + } else if (VT.isValid() && VT != MVT::aarch64svcount) { // Aggregates are no different from real scalars as far as GlobalISel is // concerned. init(/*IsPointer=*/false, /*IsVector=*/false, /*IsScalar=*/true, diff --git a/llvm/lib/Target/AArch64/AArch64.h b/llvm/lib/Target/AArch64/AArch64.h --- a/llvm/lib/Target/AArch64/AArch64.h +++ b/llvm/lib/Target/AArch64/AArch64.h @@ -54,6 +54,7 @@ FunctionPass *createFalkorMarkStridedAccessesPass(); FunctionPass *createAArch64BranchTargetsPass(); FunctionPass *createAArch64MIPeepholeOptPass(); +FunctionPass *createSMEPeepholeOptPass(); FunctionPass *createAArch64CleanupLocalDynamicTLSPass(); @@ -89,6 +90,7 @@ void initializeAArch64LoadStoreOptPass(PassRegistry&); void initializeAArch64LowerHomogeneousPrologEpilogPass(PassRegistry &); void initializeAArch64MIPeepholeOptPass(PassRegistry &); +void initializeSMEPeepholeOptPass(PassRegistry &); void initializeAArch64SIMDInstrOptPass(PassRegistry&); void initializeAArch64O0PreLegalizerCombinerPass(PassRegistry &); void initializeAArch64PreLegalizerCombinerPass(PassRegistry&); diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td --- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td +++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td @@ -82,9 +82,9 @@ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], CCPassIndirect>, - CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount], CCAssignToReg<[P0, P1, P2, P3]>>, - CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount], CCPassIndirect>, // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, @@ -149,7 +149,7 @@ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64], CCAssignToReg<[Z0, Z1, Z2, Z3, Z4, Z5, Z6, Z7]>>, - CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1], + CCIfType<[nxv1i1, nxv2i1, nxv4i1, nxv8i1, nxv16i1, aarch64svcount], CCAssignToReg<[P0, P1, P2, P3]>> ]>; diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -5032,6 +5032,8 @@ } bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { + if (TLI.fallBackToDAGISel(*I)) + return false; switch (I->getOpcode()) { default: break; @@ -5114,5 +5116,9 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { + + SMEAttrs CallerAttrs(*FuncInfo.Fn); + if (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()) + return nullptr; return new AArch64FastISel(FuncInfo, LibInfo); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -325,6 +325,11 @@ void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, unsigned Opc_rr, unsigned Opc_ri, bool IsIntr = false); + void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs, + unsigned Scale, unsigned Opc_rr, + unsigned Opc_ri); + void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg, + unsigned Op); bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); /// SVE Reg+Imm addressing mode. @@ -337,9 +342,9 @@ return SelectSVERegRegAddrMode(N, Scale, Base, Offset); } - template + template bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) { - return SelectSMETileSlice(N, Scale, Vector, Offset); + return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale); } void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); @@ -410,8 +415,8 @@ bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, SDValue &Offset); - bool SelectSMETileSlice(SDValue N, unsigned Scale, SDValue &Vector, - SDValue &Offset); + bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector, + SDValue &Offset, unsigned Scale = 1); bool SelectAllActivePredicate(SDValue N); }; @@ -1604,6 +1609,99 @@ CurDAG->RemoveDeadNode(N); } +void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N, + unsigned NumVecs, + unsigned Scale, + unsigned Opc_ri, + unsigned Opc_rr) { + assert(Scale < 4 && "Invalid scaling value."); + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + + // Use simplest addressing mode for now - base + 0 offset + SDValue PNg = N->getOperand(2); + SDValue Base = N->getOperand(3); + SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64); + + SDValue Ops[] = {PNg, // Predicate-as-counter + Base, // Memory operand + Offset, Chain}; + + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; + + SDNode *Load = CurDAG->getMachineNode(Opc_ri, DL, ResTys, Ops); + SDValue SuperReg = SDValue(Load, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + i, DL, VT, SuperReg)); + + // Copy chain + unsigned ChainIdx = NumVecs; + ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); + CurDAG->RemoveDeadNode(N); +} + +void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, + unsigned BaseReg, unsigned Op) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue Chain = N->getOperand(0); + SDValue Base, Offset; + + SDValue TileOp = N->getOperand(2); + SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other); + + bool IsTileSlice; + switch (BaseReg) { + default: + IsTileSlice = false; + break; + case AArch64::ZAB0: + if (NumVecs == 2) + IsTileSlice = SelectSMETileSlice<14, 2>(TileOp, Base, Offset); + else + IsTileSlice = SelectSMETileSlice<12, 4>(TileOp, Base, Offset); + break; + case AArch64::ZAH0: + if (NumVecs == 2) + IsTileSlice = SelectSMETileSlice<6, 2>(TileOp, Base, Offset); + else + IsTileSlice = SelectSMETileSlice<4, 4>(TileOp, Base, Offset); + break; + case AArch64::ZAS0: + if (NumVecs == 2) + IsTileSlice = SelectSMETileSlice<2, 2>(TileOp, Base, Offset); + else + IsTileSlice = SelectSMETileSlice<0, 4>(TileOp, Base, Offset); + break; + case AArch64::ZAD0: + if (NumVecs == 2) + IsTileSlice = SelectSMETileSlice<0, 2>(TileOp, Base, Offset); + else + IsTileSlice = SelectSMETileSlice<0, 2>(TileOp, Base, Offset); + break; + case AArch64::ZA: + IsTileSlice = SelectSMETileSlice<7, 1>(TileOp, Base, Offset); + break; + } + + assert(IsTileSlice && "Invalid offset value."); + + SDValue Ops[] = {SubReg, Base, Offset, Chain}; + const EVT ResTys[] = {MVT::Untyped, MVT::Other}; + + SDNode *Move = CurDAG->getMachineNode(Op, DL, ResTys, Ops); + SDValue SuperReg = SDValue(Move, 0); + for (unsigned i = 0; i < NumVecs; ++i) + ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( + AArch64::zsub0 + i, DL, VT, SuperReg)); + // Copy chain + unsigned ChainIdx = NumVecs; + ReplaceUses(SDValue(N, ChainIdx), SDValue(Move, 1)); + CurDAG->RemoveDeadNode(N); +} + void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc) { SDLoc dl(N); @@ -4186,6 +4284,74 @@ } break; } + case Intrinsic::aarch64_sve_ld1_pn_vg2: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2ZCXI, AArch64::LD1B_2ZCXX); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2ZCXI, AArch64::LD1H_2ZCXX); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2ZCXI, AArch64::LD1W_2ZCXX); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2ZCXI, AArch64::LD1D_2ZCXX); + return; + } + break; + } + case Intrinsic::aarch64_sve_ld1_pn_vg4: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4ZCXI, AArch64::LD1B_4ZCXX); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4ZCXI, AArch64::LD1H_4ZCXX); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4ZCXI, AArch64::LD1W_4ZCXX); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4ZCXI, AArch64::LD1D_4ZCXX); + return; + } + break; + } + case Intrinsic::aarch64_sve_ldnt1_pn_vg2: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2ZCXI, AArch64::LDNT1B_2ZCXX); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2ZCXI, AArch64::LDNT1H_2ZCXX); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2ZCXI, AArch64::LDNT1W_2ZCXX); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2ZCXI, AArch64::LDNT1D_2ZCXX); + return; + } + break; + } + case Intrinsic::aarch64_sve_ldnt1_pn_vg4: { + if (VT == MVT::nxv16i8) { + SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4ZCXI, AArch64::LDNT1B_4ZCXX); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || + VT == MVT::nxv8bf16) { + SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4ZCXI, AArch64::LDNT1H_4ZCXX); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4ZCXI, AArch64::LDNT1W_4ZCXX); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4ZCXI, AArch64::LDNT1D_4ZCXX); + return; + } + break; + } case Intrinsic::aarch64_sve_ld3_sret: { if (VT == MVT::nxv16i8) { SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B, @@ -4228,6 +4394,94 @@ } break; } + case Intrinsic::aarch64_sme_read_hor_vg2: { + if (VT == MVT::nxv16i8) { + SelectMultiVectorMove(Node, 2, AArch64::ZAB0, + AArch64::EXTRACT_2ZTI_H_B); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || VT == MVT::nxv8bf16) { + SelectMultiVectorMove(Node, 2, AArch64::ZAH0, + AArch64::EXTRACT_2ZTI_H_H); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectMultiVectorMove(Node, 2, AArch64::ZAS0, + AArch64::EXTRACT_2ZTI_H_S); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectMultiVectorMove(Node, 2, AArch64::ZAD0, + AArch64::EXTRACT_2ZTI_H_D); + return; + } + break; + } + case Intrinsic::aarch64_sme_read_ver_vg2: { + if (VT == MVT::nxv16i8) { + SelectMultiVectorMove(Node, 2, AArch64::ZAB0, + AArch64::EXTRACT_2ZTI_V_B); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || VT == MVT::nxv8bf16) { + SelectMultiVectorMove(Node, 2, AArch64::ZAH0, + AArch64::EXTRACT_2ZTI_V_H); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectMultiVectorMove(Node, 2, AArch64::ZAS0, + AArch64::EXTRACT_2ZTI_V_S); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectMultiVectorMove(Node, 2, AArch64::ZAD0, + AArch64::EXTRACT_2ZTI_V_D); + return; + } + break; + } + case Intrinsic::aarch64_sme_read_hor_vg4: { + if (VT == MVT::nxv16i8) { + SelectMultiVectorMove(Node, 4, AArch64::ZAB0, + AArch64::EXTRACT_4ZTI_H_B); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || VT == MVT::nxv8bf16) { + SelectMultiVectorMove(Node, 4, AArch64::ZAH0, + AArch64::EXTRACT_4ZTI_H_H); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectMultiVectorMove(Node, 4, AArch64::ZAS0, + AArch64::EXTRACT_4ZTI_H_S); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectMultiVectorMove(Node, 4, AArch64::ZAD0, + AArch64::EXTRACT_4ZTI_H_D); + return; + } + break; + } + case Intrinsic::aarch64_sme_read_ver_vg4: { + if (VT == MVT::nxv16i8) { + SelectMultiVectorMove(Node, 4, AArch64::ZAB0, + AArch64::EXTRACT_4ZTI_V_B); + return; + } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || VT == MVT::nxv8bf16) { + SelectMultiVectorMove(Node, 4, AArch64::ZAH0, + AArch64::EXTRACT_4ZTI_V_H); + return; + } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { + SelectMultiVectorMove(Node, 4, AArch64::ZAS0, + AArch64::EXTRACT_4ZTI_V_S); + return; + } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { + SelectMultiVectorMove(Node, 4, AArch64::ZAD0, + AArch64::EXTRACT_4ZTI_V_D); + return; + } + break; + } + case Intrinsic::aarch64_sme_read_vg1x2: { + SelectMultiVectorMove(Node, 2, AArch64::ZA, AArch64::EXTRACT_VG2_2ZMI); + return; + } + case Intrinsic::aarch64_sme_read_vg1x4: { + SelectMultiVectorMove(Node, 4, AArch64::ZA, AArch64::EXTRACT_VG4_4ZMI); + return; + } case Intrinsic::swift_async_context_addr: { SDLoc DL(Node); SDValue Chain = Node->getOperand(0); @@ -5440,29 +5694,25 @@ return TLI->isAllActivePredicate(*CurDAG, N); } -bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned Scale, - SDValue &Base, SDValue &Offset) { +bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize, + SDValue &Base, SDValue &Offset, + unsigned Scale) { if (N.getOpcode() != ISD::ADD) { Base = N; Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64); return true; } - // Process an ADD node. const SDValue LHS = N.getOperand(0); const SDValue RHS = N.getOperand(1); - if (auto C = dyn_cast(RHS)) { int64_t ImmOff = C->getSExtValue(); - unsigned MaxSize = (1 << Scale) - 1; - - if (ImmOff < 0 || ImmOff > MaxSize) + if ((ImmOff < 0 || ImmOff > MaxSize) || + (ImmOff % Scale != 0)) return false; - Base = LHS; - Offset = CurDAG->getTargetConstant(ImmOff, SDLoc(N), MVT::i64); + Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64); return true; } - return false; } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -584,15 +584,10 @@ MachineInstr &MI, MachineBasicBlock *BB) const; MachineBasicBlock *EmitFill(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitMopa(unsigned Opc, unsigned BaseReg, MachineInstr &MI, - MachineBasicBlock *BB) const; - MachineBasicBlock *EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, - MachineBasicBlock *BB) const; + MachineBasicBlock *EmitZAInstr(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, MachineBasicBlock *BB, + bool HasTile) const; MachineBasicBlock *EmitZero(MachineInstr &MI, MachineBasicBlock *BB) const; - MachineBasicBlock *EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, - MachineBasicBlock *BB) const; MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, @@ -904,7 +899,7 @@ void addQRTypeForNEON(MVT VT); unsigned allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, - SelectionDAG &DAG, Register &Reg) const; + SelectionDAG &DAG) const; SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -375,6 +375,12 @@ } } + if (Subtarget->hasSVE2p1() || Subtarget->hasSME2()) { + addRegisterClass(MVT::aarch64svcount, &AArch64::PPRRegClass); + setOperationAction(ISD::SELECT, MVT::aarch64svcount, Custom); + setOperationAction(ISD::SELECT_CC, MVT::aarch64svcount, Expand); + } + // Compute derived properties from the register classes computeRegisterProperties(Subtarget->getRegisterInfo()); @@ -2482,35 +2488,22 @@ } MachineBasicBlock * -AArch64TargetLowering::EmitMopa(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, MachineBasicBlock *BB) const { +AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, + MachineInstr &MI, + MachineBasicBlock *BB, bool HasTile) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); + unsigned StartIdx = 0; - MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); - MIB.addReg(BaseReg + MI.getOperand(0).getImm()); - MIB.add(MI.getOperand(1)); // pn - MIB.add(MI.getOperand(2)); // pm - MIB.add(MI.getOperand(3)); // zn - MIB.add(MI.getOperand(4)); // zm - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; -} + if (HasTile) { + MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); + MIB.addReg(BaseReg + MI.getOperand(0).getImm()); + StartIdx = 1; + } else + MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); -MachineBasicBlock * -AArch64TargetLowering::EmitInsertVectorToTile(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); - - MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); - MIB.addReg(BaseReg + MI.getOperand(0).getImm()); - MIB.add(MI.getOperand(1)); // Slice index register - MIB.add(MI.getOperand(2)); // Slice index offset - MIB.add(MI.getOperand(3)); // pg - MIB.add(MI.getOperand(4)); // zn + for (unsigned I = StartIdx; I < MI.getNumOperands(); ++I) + MIB.add(MI.getOperand(I)); MI.eraseFromParent(); // The pseudo is gone now. return BB; @@ -2533,25 +2526,30 @@ return BB; } -MachineBasicBlock * -AArch64TargetLowering::EmitAddVectorToTile(unsigned Opc, unsigned BaseReg, - MachineInstr &MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = Subtarget->getInstrInfo(); - MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); - - MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); - MIB.addReg(BaseReg + MI.getOperand(0).getImm()); - MIB.add(MI.getOperand(1)); // pn - MIB.add(MI.getOperand(2)); // pm - MIB.add(MI.getOperand(3)); // zn - - MI.eraseFromParent(); // The pseudo is gone now. - return BB; -} - MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( MachineInstr &MI, MachineBasicBlock *BB) const { + + int SMEOrigInstr = AArch64::getSMEPseudoMap(MI.getOpcode()); + if (SMEOrigInstr != -1) { + const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + uint64_t SMEMatrixType = + TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; + switch (SMEMatrixType) { + case (AArch64::SMEMatrixArray): + return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false); + case (AArch64::SMEMatrixTileB): + return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, /*HasTile*/ true); + case (AArch64::SMEMatrixTileH): + return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, /*HasTile*/ true); + case (AArch64::SMEMatrixTileS): + return EmitZAInstr(SMEOrigInstr, AArch64::ZAS0, MI, BB, /*HasTile*/ true); + case (AArch64::SMEMatrixTileD): + return EmitZAInstr(SMEOrigInstr, AArch64::ZAD0, MI, BB, /*HasTile*/ true); + case (AArch64::SMEMatrixTileQ): + return EmitZAInstr(SMEOrigInstr, AArch64::ZAQ0, MI, BB, /*HasTile*/ true); + } + } + switch (MI.getOpcode()) { default: #ifndef NDEBUG @@ -2601,94 +2599,8 @@ return EmitTileLoad(AArch64::LD1_MXIPXX_V_Q, AArch64::ZAQ0, MI, BB); case AArch64::LDR_ZA_PSEUDO: return EmitFill(MI, BB); - case AArch64::BFMOPA_MPPZZ_PSEUDO: - return EmitMopa(AArch64::BFMOPA_MPPZZ, AArch64::ZAS0, MI, BB); - case AArch64::BFMOPS_MPPZZ_PSEUDO: - return EmitMopa(AArch64::BFMOPS_MPPZZ, AArch64::ZAS0, MI, BB); - case AArch64::FMOPAL_MPPZZ_PSEUDO: - return EmitMopa(AArch64::FMOPAL_MPPZZ, AArch64::ZAS0, MI, BB); - case AArch64::FMOPSL_MPPZZ_PSEUDO: - return EmitMopa(AArch64::FMOPSL_MPPZZ, AArch64::ZAS0, MI, BB); - case AArch64::FMOPA_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::FMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::FMOPS_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::FMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::FMOPA_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::FMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::FMOPS_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::FMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::SMOPA_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::SMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::SMOPS_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::SMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::UMOPA_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::UMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::UMOPS_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::UMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::SUMOPA_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::SUMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::SUMOPS_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::SUMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::USMOPA_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::USMOPA_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::USMOPS_MPPZZ_S_PSEUDO: - return EmitMopa(AArch64::USMOPS_MPPZZ_S, AArch64::ZAS0, MI, BB); - case AArch64::SMOPA_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::SMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::SMOPS_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::SMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::UMOPA_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::UMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::UMOPS_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::UMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::SUMOPA_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::SUMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::SUMOPS_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::SUMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::USMOPA_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::USMOPA_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::USMOPS_MPPZZ_D_PSEUDO: - return EmitMopa(AArch64::USMOPS_MPPZZ_D, AArch64::ZAD0, MI, BB); - case AArch64::INSERT_MXIPZ_H_PSEUDO_B: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_B, AArch64::ZAB0, MI, - BB); - case AArch64::INSERT_MXIPZ_H_PSEUDO_H: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_H, AArch64::ZAH0, MI, - BB); - case AArch64::INSERT_MXIPZ_H_PSEUDO_S: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_S, AArch64::ZAS0, MI, - BB); - case AArch64::INSERT_MXIPZ_H_PSEUDO_D: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_D, AArch64::ZAD0, MI, - BB); - case AArch64::INSERT_MXIPZ_H_PSEUDO_Q: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_H_Q, AArch64::ZAQ0, MI, - BB); - case AArch64::INSERT_MXIPZ_V_PSEUDO_B: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_B, AArch64::ZAB0, MI, - BB); - case AArch64::INSERT_MXIPZ_V_PSEUDO_H: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_H, AArch64::ZAH0, MI, - BB); - case AArch64::INSERT_MXIPZ_V_PSEUDO_S: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_S, AArch64::ZAS0, MI, - BB); - case AArch64::INSERT_MXIPZ_V_PSEUDO_D: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_D, AArch64::ZAD0, MI, - BB); - case AArch64::INSERT_MXIPZ_V_PSEUDO_Q: - return EmitInsertVectorToTile(AArch64::INSERT_MXIPZ_V_Q, AArch64::ZAQ0, MI, - BB); case AArch64::ZERO_M_PSEUDO: return EmitZero(MI, BB); - case AArch64::ADDHA_MPPZ_PSEUDO_S: - return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_S, AArch64::ZAS0, MI, BB); - case AArch64::ADDVA_MPPZ_PSEUDO_S: - return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_S, AArch64::ZAS0, MI, BB); - case AArch64::ADDHA_MPPZ_PSEUDO_D: - return EmitAddVectorToTile(AArch64::ADDHA_MPPZ_D, AArch64::ZAD0, MI, BB); - case AArch64::ADDVA_MPPZ_PSEUDO_D: - return EmitAddVectorToTile(AArch64::ADDVA_MPPZ_D, AArch64::ZAD0, MI, BB); } } @@ -4766,6 +4678,7 @@ case Intrinsic::aarch64_sve_clz: return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(), Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); + case Intrinsic::aarch64_sme_get_live_za_slices: case Intrinsic::aarch64_sme_cntsb: return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), DAG.getConstant(1, dl, MVT::i32)); @@ -5960,6 +5873,10 @@ /// Returns true if the Function has ZA state and contains at least one call to /// a function that requires setting up a lazy-save buffer. +/// Intrinsics are ignored, because in most common cases they won't be expanded +/// to function calls. In the odd case they are expanded to a function call, +/// an ad-hoc lazy-save buffer is set up when lowering the call itself, so +/// here we optimize for the common case. static bool requiresBufferForLazySave(const Function &F) { SMEAttrs CallerAttrs(F); if (!CallerAttrs.hasZAState()) @@ -5968,34 +5885,56 @@ for (const BasicBlock &BB : F) for (const Instruction &I : BB) if (const CallInst *Call = dyn_cast(&I)) - if (CallerAttrs.requiresLazySave(SMEAttrs(*Call))) + if (!isa(Call) && + CallerAttrs.requiresLazySave(SMEAttrs(*Call))) return true; return false; } -unsigned AArch64TargetLowering::allocateLazySaveBuffer( - SDValue &Chain, const SDLoc &DL, SelectionDAG &DAG, Register &Reg) const { +unsigned +AArch64TargetLowering::allocateLazySaveBuffer(SDValue &Chain, const SDLoc &DL, + SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo &MFI = MF.getFrameInfo(); + auto *FuncInfo = MF.getInfo(); // Allocate a lazy-save buffer object of size SVL.B * SVL.B (worst-case) SDValue N = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, DAG.getConstant(1, DL, MVT::i32)); SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N); + + // The following is a bit of a hack, but I don't quickly see an alternative. + // When calling `allocateLazySaveBuffer` successively in a basic block for + // intrinsics that have no chain and use the same root node, SelectionDAG will + // see a 'dynamic_stackalloc' of the same size, simplify that to a 'sub' of SP + // and and because all the operands are the same, it will assume it can + // reuse the same DAG node (sp value). When the code deallocates the first + // buffer, SelectionDAG will no longer create the dyanmic_stackalloc of the + // second buffer, but will still try to use it and deallocate it. To avoid + // this from happening and make the dyanmic_stackalloc unique, we can change + // the size of we allocate by adding some additional offset. Performance is + // likely to be terrible for these cases anyway, but at least the code now + // does the right thing. + unsigned UniqueLazySave = FuncInfo->getNumLazySaveBuffers(); + NN = DAG.getNode(ISD::ADD, DL, MVT::i64, NN, + DAG.getConstant(UniqueLazySave * 16, DL, MVT::i64)); + FuncInfo->setNumLazySaveBuffers(++UniqueLazySave); + + // Allocate the lazy-save buffer dynamically. SDValue Ops[] = {Chain, NN, DAG.getConstant(1, DL, MVT::i64)}; SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other); SDValue Buffer = DAG.getNode(ISD::DYNAMIC_STACKALLOC, DL, VTs, Ops); - unsigned FI = MFI.CreateVariableSizedObject(Align(1), nullptr); - Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i64)); - Chain = DAG.getCopyToReg(Buffer.getValue(1), DL, Reg, Buffer.getValue(0)); + Chain = Buffer.getValue(1); + MFI.CreateVariableSizedObject(Align(1), nullptr); // Allocate an additional TPIDR2 object on the stack (16 bytes) unsigned TPIDR2Obj = MFI.CreateStackObject(16, Align(16), false); // Store the buffer pointer to the TPIDR2 stack object. - MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, FI); + MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj); SDValue Ptr = DAG.getFrameIndex( - FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); + TPIDR2Obj, + DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); Chain = DAG.getStore(Chain, DL, Buffer, Ptr, MPI); return TPIDR2Obj; @@ -6110,6 +6049,9 @@ RegVT.getVectorElementType() == MVT::i1) { FuncInfo->setIsSVECC(true); RC = &AArch64::PPRRegClass; + } else if (RegVT == MVT::aarch64svcount) { + FuncInfo->setIsSVECC(true); + RC = &AArch64::PPRRegClass; } else if (RegVT.isScalableVector()) { FuncInfo->setIsSVECC(true); RC = &AArch64::ZPRRegClass; @@ -6145,6 +6087,7 @@ break; case CCValAssign::Indirect: assert((VA.getValVT().isScalableVector() || + VA.getValVT() == MVT::aarch64svcount || Subtarget->isWindowsArm64EC()) && "Indirect arguments should be scalable on most subtargets"); break; @@ -6225,9 +6168,10 @@ } if (VA.getLocInfo() == CCValAssign::Indirect) { - assert( - (VA.getValVT().isScalableVector() || Subtarget->isWindowsArm64EC()) && - "Indirect arguments should be scalable on most subtargets"); + assert((VA.getValVT().isScalableVector() || + VA.getValVT() == MVT::aarch64svcount || + Subtarget->isWindowsArm64EC()) && + "Indirect arguments should be scalable on most subtargets"); uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize(); unsigned NumParts = 1; @@ -6302,8 +6246,8 @@ for (unsigned I=0; IsetLazySaveBufferReg(Reg); + unsigned TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG); FuncInfo->setLazySaveTPIDR2Obj(TPIDR2Obj); } @@ -6976,8 +6918,11 @@ getCalleeAttrsFromExternalFunction(CLI.Callee)) CalleeAttrs = *Attrs; + Optional TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj(); bool RequiresLazySave = CallerAttrs.requiresLazySave(CalleeAttrs); + SDValue TemporaryLazySave; + MachineFrameInfo &MFI = MF.getFrameInfo(); if (RequiresLazySave) { // Set up a lazy save mechanism by storing the runtime live slices @@ -6985,15 +6930,17 @@ SDValue N = DAG.getNode(AArch64ISD::RDSVL, DL, MVT::i64, DAG.getConstant(1, DL, MVT::i32)); SDValue NN = DAG.getNode(ISD::MUL, DL, MVT::i64, N, N); - unsigned TPIDR2Obj = FuncInfo->getLazySaveTPIDR2Obj(); if (!TPIDR2Obj) { - Register Reg; - TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG, Reg); + SDValue Save = + DAG.getNode(ISD::STACKSAVE, DL, {MVT::i64, MVT::Other}, Chain); + TemporaryLazySave = Save.getValue(0); + Chain = Save.getValue(1); + TPIDR2Obj = allocateLazySaveBuffer(Chain, DL, DAG); } - MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, TPIDR2Obj); - SDValue TPIDR2ObjAddr = DAG.getFrameIndex(TPIDR2Obj, + MachinePointerInfo MPI = MachinePointerInfo::getStack(MF, *TPIDR2Obj); + SDValue TPIDR2ObjAddr = DAG.getFrameIndex(*TPIDR2Obj, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); SDValue BufferPtrAddr = DAG.getNode(ISD::ADD, DL, TPIDR2ObjAddr.getValueType(), TPIDR2ObjAddr, @@ -7088,7 +7035,8 @@ Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg); break; case CCValAssign::Indirect: - bool isScalable = VA.getValVT().isScalableVector(); + bool isScalable = VA.getValVT().isScalableVector() || + VA.getValVT() == MVT::aarch64svcount; assert((isScalable || Subtarget->isWindowsArm64EC()) && "Indirect arguments should be scalable on most subtargets"); @@ -7429,7 +7377,6 @@ DAG.getConstant(0, DL, MVT::i64), DAG.getConstant(1, DL, MVT::i64)); // Conditionally restore the lazy save using a pseudo node. - unsigned FI = FuncInfo->getLazySaveTPIDR2Obj(); SDValue RegMask = DAG.getRegisterMask( TRI->SMEABISupportRoutinesCallPreservedMaskFromX0()); SDValue RestoreRoutine = DAG.getTargetExternalSymbol( @@ -7442,7 +7389,8 @@ // RESTORE_ZA pseudo. SDValue Glue; SDValue TPIDR2Block = DAG.getFrameIndex( - FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); + *TPIDR2Obj, + DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout())); Result = DAG.getCopyToReg(Result, DL, AArch64::X0, TPIDR2Block, Glue); Result = DAG.getNode(AArch64ISD::RESTORE_ZA, DL, MVT::Other, {Result, TPIDR2_EL0, @@ -7456,6 +7404,10 @@ ISD::INTRINSIC_VOID, DL, MVT::Other, Result, DAG.getConstant(Intrinsic::aarch64_sme_set_tpidr2, DL, MVT::i32), DAG.getConstant(0, DL, MVT::i64)); + + if (TemporaryLazySave) + Result = DAG.getNode(ISD::STACKRESTORE, DL, MVT::Other, Result, + TemporaryLazySave); } if (RequiresSMChange || RequiresLazySave) { @@ -8906,6 +8858,22 @@ SDLoc DL(Op); EVT Ty = Op.getValueType(); + if (Ty == MVT::aarch64svcount) { + TVal = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, TVal); + FVal = DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, MVT::nxv16i1, FVal); + EVT CCVT = CCVal.getValueType(); + SDValue ID = + DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo, DL, CCVT); + SDValue Zero = DAG.getConstant(0, DL, CCVT); + SDValue SplatVal = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, CCVT, CCVal, + DAG.getValueType(MVT::i1)); + SDValue SplatPred = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::nxv16i1, ID, Zero, SplatVal); + SDValue Sel = + DAG.getNode(ISD::VSELECT, DL, MVT::nxv16i1, SplatPred, TVal, FVal); + return DAG.getNode(AArch64ISD::REINTERPRET_CAST, DL, Ty, Sel); + } + if (Ty.isScalableVector()) { SDValue TruncCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, CCVal); MVT PredVT = MVT::getVectorVT(MVT::i1, Ty.getVectorElementCount()); @@ -14297,6 +14265,9 @@ return false; // FIXME: Update this method to support scalable addressing modes. + if (Ty->isAArch64SvcountTy()) + return AM.HasBaseReg && !AM.BaseOffs && !AM.Scale; + if (isa(Ty)) { uint64_t VecElemNumBytes = DL.getTypeSizeInBits(cast(Ty)->getElementType()) / 8; @@ -19825,7 +19796,7 @@ if (N0.getOpcode() != ISD::SETCC) return SDValue(); - if (ResVT.isScalableVector()) + if (ResVT.isScalableVector() || ResVT == MVT::aarch64svcount) return SDValue(); // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered @@ -20858,6 +20829,26 @@ return DAG.getMergeValues( {A, DAG.getZExtOrTrunc(B, DL, MVT::i1), A.getValue(2)}, DL); } + case Intrinsic::aarch64_sme_invoke_resume_pstatesm: { + SDLoc DL(N); + const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); + SmallVector SMOps; + SMOps.push_back(/*Chain*/ N->getOperand(0)); + SMOps.push_back( + DAG.getTargetConstant((int32_t)(AArch64SVCR::SVCRSM), DL, MVT::i32)); + assert(N->getOperand(0) == DAG.getEntryNode() && "Unexpected Chain value"); + auto *ConstOp = dyn_cast(N->getOperand(2)); + if (ConstOp && ConstOp->getZExtValue() == 1) + SMOps.push_back(DAG.getConstant(1, DL, MVT::i64)); + else + SMOps.push_back(/*PState.SM on entry*/ N->getOperand(2).getValue(0)); + SMOps.push_back(DAG.getTargetConstant(0, DL, MVT::i64)); + SMOps.push_back( + DAG.getRegisterMask(TRI->getSMStartStopCallPreservedMask())); + SMOps.push_back(/*Glue*/N->getOperand(0).getValue(1)); + return DAG.getNode(AArch64ISD::SMSTART, DL, + DAG.getVTList(MVT::Other, MVT::Glue), SMOps); + } default: break; } @@ -21904,18 +21895,31 @@ } bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { - if (isa(Inst.getType())) + auto IsScalable = [](const Type *T) { + return isa(T) || T->isAArch64SvcountTy(); + }; + + if (IsScalable(Inst.getType())) return true; for (unsigned i = 0; i < Inst.getNumOperands(); ++i) - if (isa(Inst.getOperand(i)->getType())) + if (IsScalable(Inst.getOperand(i)->getType())) return true; if (const AllocaInst *AI = dyn_cast(&Inst)) { - if (isa(AI->getAllocatedType())) + if (IsScalable(AI->getAllocatedType())) return true; } + // Checks to allow the use of SME instructions + if (auto *Base = dyn_cast(&Inst)) { + auto CallerAttrs = SMEAttrs(*Inst.getFunction()); + auto CalleeAttrs = SMEAttrs(*Base); + if (CallerAttrs.requiresSMChange(CalleeAttrs, + /*BodyOverridesInterface=*/false) || + CallerAttrs.requiresLazySave(CalleeAttrs)) + return true; + } return false; } diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -45,6 +45,17 @@ def FalseLanesZero : FalseLanesEnum<1>; def FalseLanesUndef : FalseLanesEnum<2>; +class SMEMatrixTypeEnum val> { + bits<3> Value = val; +} +def SMEMatrixNone : SMEMatrixTypeEnum<0>; +def SMEMatrixTileB : SMEMatrixTypeEnum<1>; +def SMEMatrixTileH : SMEMatrixTypeEnum<2>; +def SMEMatrixTileS : SMEMatrixTypeEnum<3>; +def SMEMatrixTileD : SMEMatrixTypeEnum<4>; +def SMEMatrixTileQ : SMEMatrixTypeEnum<5>; +def SMEMatrixArray : SMEMatrixTypeEnum<6>; + // AArch64 Instruction Format class AArch64Inst : Instruction { field bits<32> Inst; // Instruction encoding. @@ -65,16 +76,18 @@ bit isPTestLike = 0; FalseLanesEnum FalseLanes = FalseLanesNone; DestructiveInstTypeEnum DestructiveInstType = NotDestructive; + SMEMatrixTypeEnum SMEMatrixType = SMEMatrixNone; ElementSizeEnum ElementSize = ElementSizeNone; - let TSFlags{10} = isPTestLike; - let TSFlags{9} = isWhile; - let TSFlags{8-7} = FalseLanes.Value; - let TSFlags{6-3} = DestructiveInstType.Value; - let TSFlags{2-0} = ElementSize.Value; + let TSFlags{13-11} = SMEMatrixType.Value; + let TSFlags{10} = isPTestLike; + let TSFlags{9} = isWhile; + let TSFlags{8-7} = FalseLanes.Value; + let TSFlags{6-3} = DestructiveInstType.Value; + let TSFlags{2-0} = ElementSize.Value; - let Pattern = []; - let Constraints = cstr; + let Pattern = []; + let Constraints = cstr; } class InstSubst @@ -941,6 +954,13 @@ let ParserMatchClass = Imm0_1Operand; } +// timm32_0_1 predicate - True if the 32-bit immediate is in the range [0,1] +def timm32_0_1 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_1Operand; +} + // imm0_15 predicate - True if the immediate is in the range [0,15] def imm0_15 : Operand, ImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_3Operand; +} + // timm32_0_7 predicate - True if the 32-bit immediate is in the range [0,7] def timm32_0_7 : Operand, TImmLeaf, TImmLeaf { + let ParserMatchClass = Imm0_15Operand; +} + +// timm32_0_31 predicate - True if the 32-bit immediate is in the range [0,31] +def timm32_0_31 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_31Operand; +} + +// timm32_0_255 predicate - True if the 32-bit immediate is in the range [0,255] +def timm32_0_255 : Operand, TImmLeaf { + let ParserMatchClass = Imm0_255Operand; +} + // An arithmetic shifter operand: // {7-6} - shift type: 00 = lsl, 01 = lsr, 10 = asr // {5-0} - imm6 @@ -1340,39 +1388,51 @@ : VectorIndex; -def sme_elm_idx0_0 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_0Operand; let PrintMethod = "printMatrixIndex"; let OperandNamespace = "AArch64"; let OperandType = "OPERAND_IMPLICIT_IMM_0"; } -def sme_elm_idx0_1 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_1Operand; let PrintMethod = "printMatrixIndex"; } -def sme_elm_idx0_3 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_3Operand; let PrintMethod = "printMatrixIndex"; } -def sme_elm_idx0_7 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_7Operand; let PrintMethod = "printMatrixIndex"; } -def sme_elm_idx0_15 : Operand, ImmLeaf, TImmLeaf { let ParserMatchClass = Imm0_15Operand; let PrintMethod = "printMatrixIndex"; } +// SME2 vector select offset operands + +// uimm3s8 predicate +// True if the immediate is a multiple of 8 in the range [0,56]. +def UImm3s8Operand : UImmScaledMemoryIndexed<3, 8>; + +def uimm3s8 : Operand, ImmLeaf= 0 && Imm <= 56 && ((Imm % 8) == 0); }], UImmS8XForm> { + let PrintMethod = "printImmScale<8, false>"; + let ParserMatchClass = UImm3s8Operand; +} + class UImmScaledMemoryIndexedRange : AsmOperandClass { let Name = "UImm" # Width # "s" # Scale # "Range"; let DiagnosticType = "InvalidMemoryIndexedRange" # Scale # "UImm" # Width; @@ -1381,9 +1441,52 @@ let ParserMethod = "tryParseImmRange"; } +// Implicit immediates ranges 0:1 and 0:3, scale has no meaning +// since the immediate is zero +def UImm0s2RangeOperand : UImmScaledMemoryIndexedRange<0, 2, 1>; +def UImm0s4RangeOperand : UImmScaledMemoryIndexedRange<0, 4, 3>; + +def UImm1s2RangeOperand : UImmScaledMemoryIndexedRange<1, 2, 1>; +def UImm1s4RangeOperand : UImmScaledMemoryIndexedRange<1, 4, 3>; +def UImm2s4RangeOperand : UImmScaledMemoryIndexedRange<2, 4, 3>; def UImm2s2RangeOperand : UImmScaledMemoryIndexedRange<2, 2, 1>; def UImm3s2RangeOperand : UImmScaledMemoryIndexedRange<3, 2, 1>; + +def uimm0s2range : Operand, ImmLeaf { + let PrintMethod = "printImmRangeScale<2, 1>"; + let ParserMatchClass = UImm0s2RangeOperand; + let OperandNamespace = "AArch64"; + let OperandType = "OPERAND_IMPLICIT_IMM_0"; +} + +def uimm0s4range : Operand, ImmLeaf { + let PrintMethod = "printImmRangeScale<4, 3>"; + let ParserMatchClass = UImm0s4RangeOperand; + let OperandNamespace = "AArch64"; + let OperandType = "OPERAND_IMPLICIT_IMM_0"; +} + +def uimm1s2range : Operand, ImmLeaf= 0 && Imm <= 2 && ((Imm % 2) == 0); }], UImmS2XForm> { + let PrintMethod = "printImmRangeScale<2, 1>"; + let ParserMatchClass = UImm1s2RangeOperand; +} + +def uimm2s4range : Operand, ImmLeaf= 0 && Imm <= 12 && ((Imm % 4) == 0); }], UImmS4XForm> { + let PrintMethod = "printImmRangeScale<4, 3>"; + let ParserMatchClass = UImm2s4RangeOperand; +} + +def uimm1s4range : Operand, ImmLeaf= 0 && Imm <= 4 && ((Imm % 4) == 0); }], UImmS4XForm> { + let PrintMethod = "printImmRangeScale<4, 3>"; + let ParserMatchClass = UImm1s4RangeOperand; +} + def uimm2s2range : Operand, ImmLeaf= 0 && Imm <= 6 && ((Imm % 2) == 0); }], UImmS2XForm> { let PrintMethod = "printImmRangeScale<2, 1>"; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -502,10 +502,11 @@ unsigned getBLRCallOpcode(const MachineFunction &MF); // struct TSFlags { -#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits -#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits -#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits -#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits +#define TSFLAG_ELEMENT_SIZE_TYPE(X) (X) // 3-bits +#define TSFLAG_DESTRUCTIVE_INST_TYPE(X) ((X) << 3) // 4-bits +#define TSFLAG_FALSE_LANE_TYPE(X) ((X) << 7) // 2-bits +#define TSFLAG_INSTR_FLAGS(X) ((X) << 9) // 2-bits +#define TSFLAG_SME_MATRIX_TYPE(X) ((X) << 11) // 3-bits // } namespace AArch64 { @@ -543,14 +544,28 @@ static const uint64_t InstrFlagIsWhile = TSFLAG_INSTR_FLAGS(0x1); static const uint64_t InstrFlagIsPTestLike = TSFLAG_INSTR_FLAGS(0x2); +enum SMEMatrixType { + SMEMatrixTypeMask = TSFLAG_SME_MATRIX_TYPE(0x7), + SMEMatrixNone = TSFLAG_SME_MATRIX_TYPE(0x0), + SMEMatrixTileB = TSFLAG_SME_MATRIX_TYPE(0x1), + SMEMatrixTileH = TSFLAG_SME_MATRIX_TYPE(0x2), + SMEMatrixTileS = TSFLAG_SME_MATRIX_TYPE(0x3), + SMEMatrixTileD = TSFLAG_SME_MATRIX_TYPE(0x4), + SMEMatrixTileQ = TSFLAG_SME_MATRIX_TYPE(0x5), + SMEMatrixArray = TSFLAG_SME_MATRIX_TYPE(0x6), +}; + #undef TSFLAG_ELEMENT_SIZE_TYPE #undef TSFLAG_DESTRUCTIVE_INST_TYPE #undef TSFLAG_FALSE_LANE_TYPE #undef TSFLAG_INSTR_FLAGS +#undef TSFLAG_SME_MATRIX_TYPE int getSVEPseudoMap(uint16_t Opcode); int getSVERevInstr(uint16_t Opcode); int getSVENonRevInstr(uint16_t Opcode); + +int getSMEPseudoMap(uint16_t Opcode); } } // end namespace llvm diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -184,13 +184,11 @@ /// or return type bool IsSVECC = false; - /// The virtual register that is the pointer to the lazy save buffer. - /// This value is used during ISelLowering. - Register LazySaveBufferReg = 0; - /// The frame-index for the TPIDR2 object used for lazy saves. - Register LazySaveTPIDR2Obj = 0; + Optional LazySaveTPIDR2Obj = None; + /// The number of lazy save buffers set up in this function. + unsigned NumLazySaveBuffers = 0; /// True if the function need unwind information. mutable Optional NeedsDwarfUnwindInfo; @@ -209,11 +207,11 @@ bool isSVECC() const { return IsSVECC; }; void setIsSVECC(bool s) { IsSVECC = s; }; - unsigned getLazySaveBufferReg() const { return LazySaveBufferReg; } - void setLazySaveBufferReg(unsigned Reg) { LazySaveBufferReg = Reg; } + Optional getLazySaveTPIDR2Obj() const { return LazySaveTPIDR2Obj; } + void setLazySaveTPIDR2Obj(unsigned Obj) { LazySaveTPIDR2Obj = Obj; } - unsigned getLazySaveTPIDR2Obj() const { return LazySaveTPIDR2Obj; } - void setLazySaveTPIDR2Obj(unsigned Reg) { LazySaveTPIDR2Obj = Reg; } + unsigned getNumLazySaveBuffers() const { return NumLazySaveBuffers; } + void setNumLazySaveBuffers(unsigned Num) { NumLazySaveBuffers = Num; } void initializeBaseYamlFields(const yaml::AArch64FunctionInfo &YamlMFI); diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td --- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -871,15 +871,16 @@ //****************************************************************************** // SVE predicate register classes. -class PPRClass : RegisterClass< +class PPRClass : RegisterClass< "AArch64", - [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1 ], 16, - (sequence "P%u", 0, lastreg)> { + [ nxv16i1, nxv8i1, nxv4i1, nxv2i1, nxv1i1, aarch64svcount ], 16, + (sequence "P%u", firstreg, lastreg)> { let Size = 16; } -def PPR : PPRClass<15>; -def PPR_3b : PPRClass<7>; // Restricted 3 bit SVE predicate register class. +def PPR : PPRClass<0, 15>; +def PPR_3b : PPRClass<0, 7>; // Restricted 3 bit SVE predicate register class. +def PPR_3b_p8_p15 : PPRClass<8, 15>; class PPRAsmOperand : AsmOperandClass { let Name = "SVE" # name # "Reg"; @@ -906,6 +907,38 @@ def PPR3bAny : PPRRegOp<"", PPRAsmOp3bAny, ElementSizeNone, PPR_3b>; + +// SME predicate-as-counter operand +class PNRAsmOperand + : PPRAsmOperand { + let PredicateMethod = "isSMEPredicateAsCounterRegOfWidth<" + # Width # ", " # "AArch64::" + # RegClass # "RegClassID>"; + let DiagnosticType = "InvalidSME" # name # "Reg"; + let ParserMethod = "tryParseSMEPredicateAsCounter"; +} + +class PNRP8_15RegOp + : PPRRegOp { + let PrintMethod = "printPredicateAsCounter<" # EltSize # ">"; + let EncoderMethod = "EncodePPR_3b_p8_p15"; + let DecoderMethod = "DecodePPR_3b_p8_p15RegisterClass"; +} + +def PNRAsmAny_p8_p15 : PNRAsmOperand<"PNPredicateAny_p8_p15", "PPR_3b_p8_p15", 0>; +def PNRAsmOp8_p8_p15 : PNRAsmOperand<"PNPredicateB_p8_p15", "PPR_3b_p8_p15", 8>; +def PNRAsmOp16_p8_p15 : PNRAsmOperand<"PNPredicateH_p8_p15", "PPR_3b_p8_p15", 16>; +def PNRAsmOp32_p8_p15 : PNRAsmOperand<"PNPredicateS_p8_p15", "PPR_3b_p8_p15", 32>; +def PNRAsmOp64_p8_p15 : PNRAsmOperand<"PNPredicateD_p8_p15", "PPR_3b_p8_p15", 64>; + +def PNRAny_p8_p15 : PNRP8_15RegOp<"", PNRAsmAny_p8_p15, 0, PPR_3b_p8_p15>; +def PNR8_p8_p15 : PNRP8_15RegOp<"b", PNRAsmOp8_p8_p15, 8, PPR_3b_p8_p15>; +def PNR16_p8_p15 : PNRP8_15RegOp<"h", PNRAsmOp16_p8_p15, 16, PPR_3b_p8_p15>; +def PNR32_p8_p15 : PNRP8_15RegOp<"s", PNRAsmOp32_p8_p15, 32, PPR_3b_p8_p15>; +def PNR64_p8_p15 : PNRP8_15RegOp<"d", PNRAsmOp64_p8_p15, 64, PPR_3b_p8_p15>; + + + //****************************************************************************** // SVE vector register classes @@ -1112,6 +1145,10 @@ def ZZ_d_mul_r : RegisterOperand"> { let ParserMatchClass = ZPRVectorListMul<64, 2>; } + + def ZZ_q_mul_r : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListMul<128, 2>; + } } // end let EncoderMethod/DecoderMethod let EncoderMethod = "EncodeRegAsMultipleOf<4>", @@ -1131,8 +1168,114 @@ def ZZZZ_d_mul_r : RegisterOperand"> { let ParserMatchClass = ZPRVectorListMul<64, 4>; } + + def ZZZZ_q_mul_r : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListMul<128, 4>; + } } // end let EncoderMethod/DecoderMethod +// SME2 strided multi-vector operands + +// ZStridedPairs +// +// A group of two Z vectors with strided numbering consisting of: +// Zn+0.T and Zn+8.T +// where n is in the range 0 to 7 and 16 to 23 inclusive, and T is one of B, H, +// S, or D. + +// Z0_Z8, Z1_Z9, Z2_Z10, Z3_Z11, Z4_Z12, Z5_Z13, Z6_Z14, Z7_Z15 +def ZStridedPairsLo : RegisterTuples<[zsub0, zsub1], [ + (trunc (rotl ZPR, 0), 8), (trunc (rotl ZPR, 8), 8) +]>; + +// Z16_Z24, Z17_Z25, Z18_Z26, Z19_Z27, Z20_Z28, Z21_Z29, Z22_Z30, Z23_Z31 +def ZStridedPairsHi : RegisterTuples<[zsub0, zsub1], [ + (trunc (rotl ZPR, 16), 8), (trunc (rotl ZPR, 24), 8) +]>; + +// ZStridedQuads +// +// A group of four Z vectors with strided numbering consisting of: +// Zn+0.T, Zn+4.T, Zn+8.T and Zn+12.T +// where n is in the range 0 to 3 and 16 to 19 inclusive, and T is one of B, H, +// S, or D. + +// Z0_Z4_Z8_Z12, Z1_Z5_Z9_Z13, Z2_Z6_Z10_Z14, Z3_Z7_Z11_Z15 +def ZStridedQuadsLo : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [ + (trunc (rotl ZPR, 0), 4), (trunc (rotl ZPR, 4), 4), + (trunc (rotl ZPR, 8), 4), (trunc (rotl ZPR, 12), 4) +]>; +// Z16_Z20_Z24_Z28, Z17_Z21_Z25_Z29, Z18_Z22_Z26_Z30, Z19_Z23_Z27_Z31 +def ZStridedQuadsHi : RegisterTuples<[zsub0, zsub1, zsub2, zsub3], [ + (trunc (rotl ZPR, 16), 4), (trunc (rotl ZPR, 20), 4), + (trunc (rotl ZPR, 24), 4), (trunc (rotl ZPR, 28), 4) +]>; + +def ZPR2Strided : RegisterClass<"AArch64", [untyped], 256, + (add ZStridedPairsLo, ZStridedPairsHi)> { + let Size = 256; +} +def ZPR4Strided : RegisterClass<"AArch64", [untyped], 512, + (add ZStridedQuadsLo, ZStridedQuadsHi)> { + let Size = 512; +} + + +class ZPRVectorListStrided + : ZPRVectorList { + let Name = "SVEVectorListStrided" # NumRegs # "x" # ElementWidth; + let DiagnosticType = "Invalid" # Name; + let PredicateMethod = "isTypedVectorListStrided"; + let RenderMethod = "addStridedVectorListOperands<" # NumRegs # ">"; +} + +let EncoderMethod = "EncodeZPR2StridedRegisterClass", + DecoderMethod = "DecodeZPR2StridedRegisterClass" in { + def ZZ_b_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<8, 2, 8>; + } + + def ZZ_h_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<16, 2, 8>; + } + + def ZZ_s_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<32, 2, 8>; + } + + def ZZ_d_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<64, 2, 8>; + } +} + +let EncoderMethod = "EncodeZPR4StridedRegisterClass", + DecoderMethod = "DecodeZPR4StridedRegisterClass" in { + def ZZZZ_b_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<8, 4, 4>; + } + + def ZZZZ_h_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<16, 4, 4>; + } + + def ZZZZ_s_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<32, 4, 4>; + } + + def ZZZZ_d_strided + : RegisterOperand"> { + let ParserMatchClass = ZPRVectorListStrided<64, 4, 4>; + } +} + class ZPRExtendAsmOperand : AsmOperandClass { let Name = "ZPRExtend" # ShiftExtend # RegWidth # Scale @@ -1269,6 +1412,8 @@ def ZA : AArch64Reg<0, "za", [ZAB0]>; } +def ZT0 : AArch64Reg<0, "zt0">; + // SME Register Classes let isAllocatable = 0 in { @@ -1295,6 +1440,11 @@ } } +def ZT0R : RegisterClass<"AArch64", [untyped], 512, (add ZT0)> { + let Size = 512; + let DiagnosticType = "InvalidZT0"; +} + // SME Register Operands // There are three types of SME matrix register operands: // * Tiles: @@ -1407,6 +1557,8 @@ def MatrixOp : MatrixOperand; // SME2 register operands and classes +def MatrixOp8 : MatrixOperand; +def MatrixOp16 : MatrixOperand; def MatrixOp32 : MatrixOperand; def MatrixOp64 : MatrixOperand; diff --git a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td @@ -37,15 +37,15 @@ def ADDSPL_XXI : sve_int_arith_vl<0b1, "addspl", /*streaming_sve=*/0b1>; def ADDSVL_XXI : sve_int_arith_vl<0b0, "addsvl", /*streaming_sve=*/0b1>; -def ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha">; -def ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva">; +defm ADDHA_MPPZ_S : sme_add_vector_to_tile_u32<0b0, "addha", int_aarch64_sme_addha>; +defm ADDVA_MPPZ_S : sme_add_vector_to_tile_u32<0b1, "addva", int_aarch64_sme_addva>; def : Pat<(AArch64rdsvl (i32 simm6_32b:$imm)), (RDSVLI_XI simm6_32b:$imm)>; } let Predicates = [HasSMEI16I64] in { -def ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha">; -def ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva">; +defm ADDHA_MPPZ_D : sme_add_vector_to_tile_u64<0b0, "addha", int_aarch64_sme_addha>; +defm ADDVA_MPPZ_D : sme_add_vector_to_tile_u64<0b1, "addva", int_aarch64_sme_addva>; } let Predicates = [HasSME] in { @@ -235,29 +235,28 @@ // SME2 Instructions //===----------------------------------------------------------------------===// let Predicates = [HasSME2] in { -defm ADD_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"add", 0b10>; -defm ADD_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"add", 0b10>; -defm ADD_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"add", 0b10>; -defm ADD_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"add", 0b10>; - -defm SUB_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"sub", 0b11>; -defm SUB_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"sub", 0b11>; -defm SUB_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"sub", 0b11>; -defm SUB_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"sub", 0b11>; - -defm FMLA_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"fmla", 0b00>; -defm FMLA_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"fmla", 0b00>; -defm FMLA_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"fmla", 0b00>; -defm FMLA_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"fmla", 0b00>; - -defm FMLS_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"fmls", 0b01>; -defm FMLS_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"fmls", 0b01>; -defm FMLS_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"fmls", 0b01>; -defm FMLS_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"fmls", 0b01>; - -defm ADD_VG2_2ZZ : sme2_sqdmulh_add_vector_vg2_single<"add", 0b011000>; -defm ADD_VG4_4ZZ : sme2_sqdmulh_add_vector_vg4_single<"add", 0b011000>; - +defm ADD_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"add", 0b10, null_frag>; +defm ADD_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"add", 0b10, null_frag>; +defm ADD_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"add", 0b10, null_frag>; +defm ADD_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"add", 0b10, null_frag>; + +defm ADD_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"add", 0b0110000>; +defm ADD_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"add", 0b0110000>; + +defm SUB_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"sub", 0b11, null_frag>; +defm SUB_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"sub", 0b11, null_frag>; +defm SUB_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"sub", 0b11, null_frag>; +defm SUB_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"sub", 0b11, null_frag>; + +defm FMLA_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"fmla", 0b00, int_aarch64_sme_fmla_single_vg1x2>; +defm FMLA_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"fmla", 0b00, int_aarch64_sme_fmla_single_vg1x4>; +defm FMLA_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"fmla", 0b00, int_aarch64_sme_fmla_multi_vg1x2>; +defm FMLA_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"fmla", 0b00, int_aarch64_sme_fmla_multi_vg1x4>; + +defm FMLS_VG2_M2ZZ_S : sme2_mla_add_sub_array_vg2_single_S<"fmls", 0b01, int_aarch64_sme_fmls_single_vg1x2>; +defm FMLS_VG4_M4ZZ_S : sme2_mla_add_sub_array_vg4_single_S<"fmls", 0b01, int_aarch64_sme_fmls_single_vg1x4>; +defm FMLS_VG2_M2Z2Z_S : sme2_mla_add_sub_array_vg2_multi_S<"fmls", 0b01, int_aarch64_sme_fmls_multi_vg1x2>; +defm FMLS_VG4_M4Z4Z_S : sme2_mla_add_sub_array_vg4_multi_S<"fmls", 0b01, int_aarch64_sme_fmls_multi_vg1x4>; defm ADDA_VG2_M2Z2Z_S : sme2_multivec_accum_add_sub_vg2_S<"add", 0b10>; defm ADDA_VG4_M4Z4Z_S : sme2_multivec_accum_add_sub_vg4_S<"add", 0b10>; @@ -270,82 +269,134 @@ defm FSUB_VG2_M2Z2Z_S : sme2_multivec_accum_add_sub_vg2_S<"fsub", 0b01>; defm FSUB_VG4_M4Z4Z_S : sme2_multivec_accum_add_sub_vg4_S<"fsub", 0b01>; -defm SQDMULH_2ZZ : sme2_sqdmulh_add_vector_vg2_single<"sqdmulh", 0b100000>; -defm SQDMULH_4ZZ : sme2_sqdmulh_add_vector_vg4_single<"sqdmulh", 0b100000>; -defm SQDMULH_2Z2Z2Z : sme2_sqdmulh_vector_vg2_multi<"sqdmulh">; -defm SQDMULH_4Z4Z4Z : sme2_sqdmulh_vector_vg4_multi<"sqdmulh">; - -defm FMLAL_MZZI : sme2_mla_long_array_index<"fmlal", 0b10, 0b00>; -defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal", 0b00>; -defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal", 0b00>; -defm FMLAL_MZZ : sme2_mla_long_array_single<"fmlal", 0b00, 0b00>; -defm FMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b00>; -defm FMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b00>; -defm FMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b00>; -defm FMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b00>; - -defm FMLSL_MZZI : sme2_mla_long_array_index<"fmlsl", 0b10, 0b01>; -defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl", 0b01>; -defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl", 0b01>; -defm FMLSL_MZZ : sme2_mla_long_array_single<"fmlsl", 0b00, 0b01>; -defm FMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b01>; -defm FMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b01>; -defm FMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b01>; -defm FMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b01>; - -defm BFMLAL_MZZI : sme2_mla_long_array_index<"bfmlal", 0b10, 0b10>; -defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal", 0b10>; -defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal", 0b10>; -defm BFMLAL_MZZ : sme2_mla_long_array_single<"bfmlal", 0b00, 0b10>; -defm BFMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b10>; -defm BFMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b10>; -defm BFMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b10>; -defm BFMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b10>; - -defm BFMLSL_MZZI : sme2_mla_long_array_index<"bfmlsl", 0b10, 0b11>; -defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl", 0b11>; -defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl", 0b11>; -defm BFMLSL_MZZ : sme2_mla_long_array_single<"bfmlsl", 0b00, 0b11>; -defm BFMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b11>; -defm BFMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b11>; -defm BFMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b11>; -defm BFMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b11>; - -defm SMLAL_MZZI : sme2_mla_long_array_index<"smlal", 0b11, 0b00>; -defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal", 0b00>; -defm SMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlal", 0b00>; -defm SMLAL_MZZ : sme2_mla_long_array_single<"smlal",0b01, 0b00>; -defm SMLAL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"smlal", 0b00>; -defm SMLAL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"smlal", 0b00>; -defm SMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlal", 0b00>; -defm SMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlal", 0b00>; - -defm SMLSL_MZZI : sme2_mla_long_array_index<"smlsl", 0b11, 0b01>; -defm SMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlsl", 0b01>; -defm SMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlsl", 0b01>; -defm SMLSL_MZZ : sme2_mla_long_array_single<"smlsl",0b01, 0b01>; -defm SMLSL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"smlsl", 0b01>; -defm SMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"smlsl", 0b01>; -defm SMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlsl", 0b01>; -defm SMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlsl", 0b01>; - -defm UMLAL_MZZI : sme2_mla_long_array_index<"umlal", 0b11, 0b10>; -defm UMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlal", 0b10>; -defm UMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlal", 0b10>; -defm UMLAL_MZZ : sme2_mla_long_array_single<"umlal",0b01, 0b10>; -defm UMLAL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"umlal", 0b10>; -defm UMLAL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlal", 0b10>; -defm UMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlal", 0b10>; -defm UMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlal", 0b10>; - -defm UMLSL_MZZI : sme2_mla_long_array_index<"umlsl", 0b11, 0b11>; -defm UMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlsl", 0b11>; -defm UMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlsl", 0b11>; -defm UMLSL_MZZ : sme2_mla_long_array_single<"umlsl",0b01, 0b11>; -defm UMLSL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"umlsl", 0b11>; -defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11>; -defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11>; -defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11>; +defm SQDMULH_2Z2ZZ : sme2_int_sve_destructive_vector_vg2_single<"sqdmulh", 0b1000000>; +defm SQDMULH_4Z4ZZ : sme2_int_sve_destructive_vector_vg4_single<"sqdmulh", 0b1000000>; +defm SQDMULH_2Z2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"sqdmulh", 0b100000>; +defm SQDMULH_4Z4Z4Z : sme2_int_sve_destructive_vector_vg4_multi<"sqdmulh", 0b100000>; + +defm FMLAL_MZZI : sme2_mla_long_array_index<"fmlal", 0b10, 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x1>; +defm FMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x2>; +defm FMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_lane_vg2x4>; +defm FMLAL_MZZ : sme2_mla_long_array_single<"fmlal", 0b00, 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x1>; +defm FMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x2>; +defm FMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_single_vg2x4>; +defm FMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_multi_vg2x2>; +defm FMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlal", 0b00, nxv8f16, int_aarch64_sme_fmlal_multi_vg2x4>; + +defm FMLSL_MZZI : sme2_mla_long_array_index<"fmlsl", 0b10, 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x1>; +defm FMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x2>; +defm FMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_lane_vg2x4>; +defm FMLSL_MZZ : sme2_mla_long_array_single<"fmlsl", 0b00, 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x1>; +defm FMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x2>; +defm FMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_single_vg2x4>; +defm FMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_multi_vg2x2>; +defm FMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"fmlsl", 0b01, nxv8f16, int_aarch64_sme_fmlsl_multi_vg2x4>; + +defm BFMLAL_MZZI : sme2_mla_long_array_index<"bfmlal", 0b10, 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x1>; +defm BFMLAL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x2>; +defm BFMLAL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_lane_vg2x4>; +defm BFMLAL_MZZ : sme2_mla_long_array_single<"bfmlal", 0b00, 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x1>; +defm BFMLAL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x2>; +defm BFMLAL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_single_vg2x4>; +defm BFMLAL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_multi_vg2x2>; +defm BFMLAL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlal", 0b10, nxv8bf16, int_aarch64_sme_fmlal_multi_vg2x4>; + +defm BFMLSL_MZZI : sme2_mla_long_array_index<"bfmlsl", 0b10, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x1>; +defm BFMLSL_VG2_M2ZZI : sme2_fp_mla_long_array_vg2_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x2>; +defm BFMLSL_VG4_M4ZZI : sme2_fp_mla_long_array_vg4_index<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_lane_vg2x4>; +defm BFMLSL_MZZ : sme2_mla_long_array_single<"bfmlsl", 0b00, 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x1>; +defm BFMLSL_VG2_M2ZZ : sme2_fp_mla_long_array_vg2_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x2>; +defm BFMLSL_VG4_M4ZZ : sme2_fp_mla_long_array_vg4_single<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_single_vg2x4>; +defm BFMLSL_VG2_M2Z2Z : sme2_fp_mla_long_array_vg2_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_multi_vg2x2>; +defm BFMLSL_VG4_M4Z4Z : sme2_fp_mla_long_array_vg4_multi<"bfmlsl", 0b11, nxv8bf16, int_aarch64_sme_fmlsl_multi_vg2x4>; + +defm SMLAL_MZZI : sme2_mla_long_array_index<"smlal", 0b11, 0b00, nxv8i16, int_aarch64_sme_smlal_lane_vg2x1>; +defm SMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x2>; +defm SMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlal", 0b00, int_aarch64_sme_smlal_lane_vg2x4>; +defm SMLAL_MZZ : sme2_mla_long_array_single<"smlal",0b01, 0b00, nxv8i16, int_aarch64_sme_smlal_single_vg2x1>; +defm SMLAL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"smlal", 0b00, int_aarch64_sme_smlal_single_vg2x2>; +defm SMLAL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"smlal", 0b00, int_aarch64_sme_smlal_single_vg2x4>; +defm SMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlal", 0b00, int_aarch64_sme_smlal_multi_vg2x2>; +defm SMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlal", 0b00, int_aarch64_sme_smlal_multi_vg2x4>; + +defm SMLSL_MZZI : sme2_mla_long_array_index<"smlsl", 0b11, 0b01, nxv8i16, int_aarch64_sme_smlsl_lane_vg2x1>; +defm SMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"smlsl", 0b01, int_aarch64_sme_smlsl_lane_vg2x2>; +defm SMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"smlsl", 0b01, int_aarch64_sme_smlsl_lane_vg2x4>; +defm SMLSL_MZZ : sme2_mla_long_array_single<"smlsl",0b01, 0b01, nxv8i16, int_aarch64_sme_smlsl_single_vg2x1>; +defm SMLSL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"smlsl", 0b01, int_aarch64_sme_smlsl_single_vg2x2>; +defm SMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"smlsl", 0b01, int_aarch64_sme_smlsl_single_vg2x4>; +defm SMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"smlsl", 0b01, int_aarch64_sme_smlsl_multi_vg2x2>; +defm SMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"smlsl", 0b01, int_aarch64_sme_smlsl_multi_vg2x4>; + +defm UMLAL_MZZI : sme2_mla_long_array_index<"umlal", 0b11, 0b10, nxv8i16, int_aarch64_sme_umlal_lane_vg2x1>; +defm UMLAL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlal", 0b10, int_aarch64_sme_umlal_lane_vg2x2>; +defm UMLAL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlal", 0b10, int_aarch64_sme_umlal_lane_vg2x4>; +defm UMLAL_MZZ : sme2_mla_long_array_single<"umlal",0b01, 0b10, nxv8i16, int_aarch64_sme_umlal_single_vg2x1>; +defm UMLAL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"umlal", 0b10, int_aarch64_sme_umlal_single_vg2x2>; +defm UMLAL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlal", 0b10, int_aarch64_sme_umlal_single_vg2x4>; +defm UMLAL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlal", 0b10, int_aarch64_sme_umlal_multi_vg2x2>; +defm UMLAL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlal", 0b10, int_aarch64_sme_umlal_multi_vg2x4>; + +defm UMLSL_MZZI : sme2_mla_long_array_index<"umlsl", 0b11, 0b11, nxv8i16, int_aarch64_sme_umlsl_lane_vg2x1>; +defm UMLSL_VG2_M2ZZI : sme2_int_mla_long_array_vg2_index<"umlsl", 0b11, int_aarch64_sme_umlsl_lane_vg2x2>; +defm UMLSL_VG4_M4ZZI : sme2_int_mla_long_array_vg4_index<"umlsl", 0b11, int_aarch64_sme_umlsl_lane_vg2x4>; +defm UMLSL_MZZ : sme2_mla_long_array_single<"umlsl",0b01, 0b11, nxv8i16 ,int_aarch64_sme_umlsl_single_vg2x1>; +defm UMLSL_VG2_M2ZZ : sme2_int_mla_long_array_vg2_single<"umlsl", 0b11, int_aarch64_sme_umlsl_single_vg2x2>; +defm UMLSL_VG4_M4ZZ : sme2_int_mla_long_array_vg4_single<"umlsl", 0b11, int_aarch64_sme_umlsl_single_vg2x4>; +defm UMLSL_VG2_M2Z2Z : sme2_int_mla_long_array_vg2_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_multi_vg2x2>; +defm UMLSL_VG4_M4Z4Z : sme2_int_mla_long_array_vg4_multi<"umlsl", 0b11, int_aarch64_sme_umlsl_multi_vg2x4>; + +def SMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlall", 0b000>; +defm SMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlall", 0b000>; +defm SMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlall", 0b000>; +defm SMLALL_MZZ_BtoS : sme2_mla_ll_array_single_32b<"smlall", 0b000>; +defm SMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single_32b<"smlall", 0b000>; +defm SMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single_32b<"smlall", 0b000>; +defm SMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi_32b<"smlall", 0b000>; +defm SMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi_32b<"smlall", 0b000>; + +def USMLALL_MZZI : sme2_mla_ll_array_index_32b<"usmlall", 0b001>; +defm USMLALL_VG2_M2ZZI : sme2_mla_ll_array_vg2_index_32b<"usmlall", 0b100>; +defm USMLALL_VG4_M4ZZI : sme2_mla_ll_array_vg4_index_32b<"usmlall", 0b100>; +defm USMLALL_MZZ : sme2_mla_ll_array_single_32b<"usmlall", 0b001>; +defm USMLALL_VG2_M2ZZ : sme2_mla_ll_array_vg2_single_32b<"usmlall", 0b001>; +defm USMLALL_VG4_M4ZZ : sme2_mla_ll_array_vg4_single_32b<"usmlall", 0b001>; +defm USMLALL_VG2_M2Z2Z : sme2_mla_ll_array_vg2_multi_32b<"usmlall", 0b001>; +defm USMLALL_VG4_M4Z4Z : sme2_mla_ll_array_vg4_multi_32b<"usmlall", 0b001>; + +def SMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"smlsll", 0b010>; +defm SMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"smlsll", 0b001>; +defm SMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"smlsll", 0b001>; +defm SMLSLL_MZZ_BtoS : sme2_mla_ll_array_single_32b<"smlsll", 0b010>; +defm SMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single_32b<"smlsll", 0b010>; +defm SMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single_32b<"smlsll", 0b010>; +defm SMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi_32b<"smlsll", 0b010>; +defm SMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi_32b<"smlsll", 0b010>; + +def UMLALL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlall", 0b100>; +defm UMLALL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlall", 0b010>; +defm UMLALL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlall", 0b010>; +defm UMLALL_MZZ_BtoS : sme2_mla_ll_array_single_32b<"umlall", 0b100>; +defm UMLALL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single_32b<"umlall", 0b100>; +defm UMLALL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single_32b<"umlall", 0b100>; +defm UMLALL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi_32b<"umlall", 0b100>; +defm UMLALL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi_32b<"umlall", 0b100>; + +def SUMLALL_MZZI : sme2_mla_ll_array_index_32b<"sumlall", 0b101>; +defm SUMLALL_VG2_M2ZZI : sme2_mla_ll_array_vg2_index_32b<"sumlall", 0b110>; +defm SUMLALL_VG4_M4ZZI : sme2_mla_ll_array_vg4_index_32b<"sumlall", 0b110>; +defm SUMLALL_VG2_M2ZZ : sme2_mla_ll_array_vg2_single_32b<"sumlall", 0b101>; +defm SUMLALL_VG4_M4ZZ : sme2_mla_ll_array_vg4_single_32b<"sumlall", 0b101>; + +def UMLSLL_MZZI_BtoS : sme2_mla_ll_array_index_32b<"umlsll", 0b110>; +defm UMLSLL_VG2_M2ZZI_BtoS : sme2_mla_ll_array_vg2_index_32b<"umlsll", 0b011>; +defm UMLSLL_VG4_M4ZZI_BtoS : sme2_mla_ll_array_vg4_index_32b<"umlsll", 0b011>; +defm UMLSLL_MZZ_BtoS : sme2_mla_ll_array_single_32b<"umlsll", 0b110>; +defm UMLSLL_VG2_M2ZZ_BtoS : sme2_mla_ll_array_vg2_single_32b<"umlsll", 0b110>; +defm UMLSLL_VG4_M4ZZ_BtoS : sme2_mla_ll_array_vg4_single_32b<"umlsll", 0b110>; +defm UMLSLL_VG2_M2Z2Z_BtoS : sme2_mla_ll_array_vg2_multi_32b<"umlsll", 0b110>; +defm UMLSLL_VG4_M4Z4Z_BtoS : sme2_mla_ll_array_vg4_multi_32b<"umlsll", 0b110>; + def FCVT_Z2Z_StoH : sme2_cvt_vg2_single<"fcvt", 0b00, 0b00>; def FCVTN_Z2Z_StoH : sme2_cvt_vg2_single<"fcvtn", 0b01, 0b00>; @@ -362,37 +413,371 @@ defm SQCVTUN_Z4Z : sme2_int_cvt_vg4_single<"sqcvtun", 0b110>; defm UQCVTN_Z4Z : sme2_int_cvt_vg4_single<"uqcvtn", 0b011>; +def FCVTZS_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzs", 0b010>; +def FCVTZS_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzs", 0b010>; +def FCVTZU_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"fcvtzu", 0b011>; +def FCVTZU_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"fcvtzu", 0b011>; +def SCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"scvtf", 0b100>; +def SCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"scvtf", 0b100>; +def UCVTF_2Z2Z_StoS : sme2_fp_cvt_vg2_multi<"ucvtf", 0b101>; +def UCVTF_4Z4Z_StoS : sme2_fp_cvt_vg4_multi<"ucvtf", 0b101>; + +defm SMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smax", 0b0000000>; +defm SMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smax", 0b0000000>; +defm SMAX_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"smax", 0b000000>; +defm SMAX_VG4_4Z2Z : sme2_int_sve_destructive_vector_vg4_multi<"smax", 0b000000>; + +defm UMAX_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"umax", 0b0000001>; +defm UMAX_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"umax", 0b0000001>; +defm UMAX_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"umax", 0b000001>; +defm UMAX_VG4_4Z2Z : sme2_int_sve_destructive_vector_vg4_multi<"umax", 0b000001>; + +defm SMIN_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"smin", 0b0000010>; +defm SMIN_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"smin", 0b0000010>; +defm SMIN_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"smin", 0b000010>; +defm SMIN_VG4_4Z2Z : sme2_int_sve_destructive_vector_vg4_multi<"smin", 0b000010>; + +defm UMIN_VG2_2ZZ : sme2_int_sve_destructive_vector_vg2_single<"umin", 0b0000011>; +defm UMIN_VG4_4ZZ : sme2_int_sve_destructive_vector_vg4_single<"umin", 0b0000011>; +defm UMIN_VG2_2Z2Z : sme2_int_sve_destructive_vector_vg2_multi<"umin", 0b000011>; +defm UMIN_VG4_4Z2Z : sme2_int_sve_destructive_vector_vg4_multi<"umin", 0b000011>; + +defm FMAX_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fmax", 0b0010000>; +defm FMAX_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fmax", 0b0010000>; +defm FMAX_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmax", 0b000000>; +defm FMAX_VG4_4Z2Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmax", 0b000000>; + +defm FMIN_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fmin", 0b0010001>; +defm FMIN_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fmin", 0b0010001>; +defm FMIN_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmin", 0b000001>; +defm FMIN_VG4_4Z2Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmin", 0b000001>; + +defm FMAXNM_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fmaxnm", 0b0010010>; +defm FMAXNM_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fmaxnm", 0b0010010>; +defm FMAXNM_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fmaxnm", 0b000010>; +defm FMAXNM_VG4_4Z2Z : sme2_fp_sve_destructive_vector_vg4_multi<"fmaxnm", 0b000010>; + +defm FMINNM_VG2_2ZZ : sme2_fp_sve_destructive_vector_vg2_single<"fminnm", 0b0010011>; +defm FMINNM_VG4_4ZZ : sme2_fp_sve_destructive_vector_vg4_single<"fminnm", 0b0010011>; +defm FMINNM_VG2_2Z2Z : sme2_fp_sve_destructive_vector_vg2_multi<"fminnm", 0b000011>; +defm FMINNM_VG4_4Z2Z : sme2_fp_sve_destructive_vector_vg4_multi<"fminnm", 0b000011>; + +defm SRSHL_2ZZ: sme2_int_sve_destructive_vector_vg2_single<"srshl", 0b0100010>; +defm SRSHL_4ZZ: sme2_int_sve_destructive_vector_vg4_single<"srshl", 0b0100010>; +defm SRSHL_2Z4Z: sme2_int_sve_destructive_vector_vg2_multi<"srshl", 0b010010>; +defm SRSHL_4Z4Z: sme2_int_sve_destructive_vector_vg4_multi<"srshl", 0b010010>; + +defm URSHL_2ZZ: sme2_int_sve_destructive_vector_vg2_single<"urshl", 0b0100011>; +defm URSHL_4ZZ: sme2_int_sve_destructive_vector_vg4_single<"urshl", 0b0100011>; +defm URSHL_2Z4Z: sme2_int_sve_destructive_vector_vg2_multi<"urshl", 0b010011>; +defm URSHL_4Z4Z: sme2_int_sve_destructive_vector_vg4_multi<"urshl", 0b010011>; + +defm FRINTA_2Z2Z: sme2_frint_vector_vg2_multi<"frinta", 0b100>; +defm FRINTA_4Z4Z: sme2_frint_vector_vg4_multi<"frinta", 0b100>; +defm FRINTM_2Z2Z: sme2_frint_vector_vg2_multi<"frintm", 0b010>; +defm FRINTM_4Z4Z: sme2_frint_vector_vg4_multi<"frintm", 0b010>; +defm FRINTN_2Z2Z: sme2_frint_vector_vg2_multi<"frintn", 0b000>; +defm FRINTN_4Z4Z: sme2_frint_vector_vg4_multi<"frintn", 0b000>; +defm FRINTP_2Z2Z: sme2_frint_vector_vg2_multi<"frintp", 0b001>; +defm FRINTP_4Z4Z: sme2_frint_vector_vg4_multi<"frintp", 0b001>; + +defm SEL_VG2_2ZP2Z2Z: sme2_sel_vector_vg2<"sel">; +defm SEL_VG4_4ZP4Z4Z: sme2_sel_vector_vg4<"sel">; + +defm FCLAMP_2Z2Z : sme2_fp_clamp_vector_vg2_multi<"fclamp">; +defm FCLAMP_4Z4Z : sme2_fp_clamp_vector_vg4_multi<"fclamp">; + +defm SCLAMP_2Z2Z : sme2_int_clamp_vector_vg2_multi<"sclamp", 0b0>; +defm SCLAMP_4Z4Z : sme2_int_clamp_vector_vg4_multi<"sclamp", 0b0>; + +defm UCLAMP_2Z2Z : sme2_int_clamp_vector_vg2_multi<"uclamp", 0b1>; +defm UCLAMP_4Z4Z : sme2_int_clamp_vector_vg4_multi<"uclamp", 0b1>; + + +defm FMLA_VG2_M2ZZI_S : sme2_fmla_array_vg2_index<"fmla", 0b0000, int_aarch64_sme_fmla_lane_vg1x2>; +defm FMLA_VG4_M4ZZI_S : sme2_fmla_array_vg4_index_S<"fmla", 0b0000, int_aarch64_sme_fmla_lane_vg1x4>; + +defm FMLS_VG2_M2ZZI_S : sme2_fmla_array_vg2_index<"fmls", 0b0010, int_aarch64_sme_fmls_lane_vg1x2>; +defm FMLS_VG4_M4ZZI_S : sme2_fmla_array_vg4_index_S<"fmls", 0b0010, int_aarch64_sme_fmls_lane_vg1x4>; + +defm FDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_HToS<"fdot", 0b1001>; +defm FDOT_VG4_M4ZZI : sme2_dot_array_vg4_index_HtoS<"fdot", 0b1001>; +defm FDOT_VG2_M2ZZ : sme2_fp_dot_array_vg2_single<"fdot", 0b0>; +defm FDOT_VG4_M4ZZ : sme2_fp_dot_array_vg4_single<"fdot", 0b0>; +defm FDOT_VG2_M2Z2Z : sme2_fp_dot_array_vg2_multi<"fdot", 0b0>; +defm FDOT_VG4_M4Z4Z : sme2_fp_dot_array_vg4_multi<"fdot", 0b0>; + +defm BFDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_HToS<"bfdot", 0b1011>; +defm BFDOT_VG4_M4ZZI : sme2_dot_array_vg4_index_HtoS<"bfdot", 0b1011>; +defm BFDOT_VG2_M2ZZ : sme2_fp_dot_array_vg2_single<"bfdot", 0b1>; +defm BFDOT_VG4_M4ZZ : sme2_fp_dot_array_vg4_single<"bfdot", 0b1>; +defm BFDOT_VG2_M2Z2Z : sme2_fp_dot_array_vg2_multi<"bfdot", 0b1>; +defm BFDOT_VG4_M4Z4Z : sme2_fp_dot_array_vg4_multi<"bfdot", 0b1>; + +defm BFVDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_HToS<"bfvdot", 0b0011>; +defm FVDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_HToS<"fvdot", 0b0001>; + +defm SDOT_VG2_M2ZZI_HToS : sme2_dot_array_vg2_index_HToS<"sdot", 0b1000>; +defm SDOT_VG2_M2ZZI_BToS : sme2_dot_array_vg2_index_BToS<"sdot", 0b1100>; +defm SDOT_VG4_M4ZZI_HToS : sme2_dot_array_vg4_index_HtoS<"sdot", 0b1000>; +defm SDOT_VG4_M4ZZI_BToS : sme2_dot_array_vg4_index_BtoS<"sdot", 0b1100>; +defm SDOT_VG2_M2ZZ_HtoS : sme2_int_dot_array_vg2_single_HtoS<"sdot", 0b0>; +defm SDOT_VG4_M4ZZ_HtoS : sme2_int_dot_array_vg4_single_HtoS<"sdot", 0b0>; +defm SDOT_VG2_M2Z2Z_HtoS : sme2_int_dot_array_vg2_multi_HtoS<"sdot", 0b0>; +defm SDOT_VG4_M4Z4Z_HtoS : sme2_int_dot_array_vg4_multi_HtoS<"sdot", 0b0>; +defm SDOT_VG2_M2ZZ_BtoS : sme2_int_dot_array_vg2_single_BtoS<"sdot", 0b0>; +defm SDOT_VG4_M4ZZ_BtoS : sme2_int_dot_array_vg4_single_BtoS<"sdot", 0b0>; +defm SDOT_VG2_M2Z2Z_BtoS : sme2_int_dot_array_vg2_multi_BtoS<"sdot", 0b0>; +defm SDOT_VG4_M4Z4Z_BtoS : sme2_int_dot_array_vg4_multi_BtoS<"sdot", 0b0>; + + +defm SUDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_BToS<"sudot", 0b1111>; +defm SUDOT_VG4_M4ZZI : sme2_dot_array_vg4_index_BtoS<"sudot", 0b1111>; +defm SUDOT_VG2_M2ZZ : sme2_int_dot_array_vg2_single<"sudot", 0b1>; +defm SUDOT_VG4_M4ZZ : sme2_int_dot_array_vg4_single<"sudot", 0b1>; + +defm SVDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_HToS<"svdot", 0b0100>; +defm SVDOT_VG4_M4ZZI_BtoS : sme2_dot_array_vg4_index_BtoS<"svdot", 0b0100>; +defm SUVDOT_VG4_M4ZZI : sme2_dot_array_vg4_index_BtoS<"suvdot", 0b0111>; + +defm UDOT_VG2_M2ZZI_HToS : sme2_dot_array_vg2_index_HToS<"udot", 0b1010>; +defm UDOT_VG2_M2ZZI_BToS : sme2_dot_array_vg2_index_BToS<"udot", 0b1110>; +defm UDOT_VG4_M4ZZI_BtoS : sme2_dot_array_vg4_index_BtoS<"udot", 0b1110>; +defm UDOT_VG4_M4ZZI_HToS : sme2_dot_array_vg4_index_HtoS<"udot", 0b1010>; +defm UDOT_VG2_M2ZZ_HtoS : sme2_int_dot_array_vg2_single_HtoS<"udot", 0b1>; +defm UDOT_VG4_M4ZZ_HtoS : sme2_int_dot_array_vg4_single_HtoS<"udot", 0b1>; +defm UDOT_VG2_M2Z2Z_HtoS : sme2_int_dot_array_vg2_multi_HtoS<"udot", 0b1>; +defm UDOT_VG4_M4Z4Z_HtoS : sme2_int_dot_array_vg4_multi_HtoS<"udot", 0b1>; +defm UDOT_VG2_M2ZZ_BtoS : sme2_int_dot_array_vg2_single_BtoS<"udot", 0b1>; +defm UDOT_VG4_M4ZZ_BtoS : sme2_int_dot_array_vg4_single_BtoS<"udot", 0b1>; +defm UDOT_VG2_M2Z2Z_BtoS : sme2_int_dot_array_vg2_multi_BtoS<"udot", 0b1>; +defm UDOT_VG4_M4Z4Z_BtoS : sme2_int_dot_array_vg4_multi_BtoS<"udot", 0b1>; + +defm USDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_BToS<"usdot", 0b1101>; +defm USDOT_VG4_M4ZZI : sme2_dot_array_vg4_index_BtoS<"usdot", 0b1101>; +defm USDOT_VG2_M2ZZ : sme2_int_dot_array_vg2_single<"usdot", 0b0>; +defm USDOT_VG4_M4ZZ : sme2_int_dot_array_vg4_single<"usdot", 0b0>; +defm USDOT_VG2_M2Z2Z : sme2_int_dot_array_vg2_multi<"usdot">; +defm USDOT_VG4_M4Z4Z : sme2_int_dot_array_vg4_multi<"usdot">; + +defm USVDOT_VG4_M4ZZI : sme2_dot_array_vg4_index_BtoS<"usvdot", 0b0101>; +defm UVDOT_VG2_M2ZZI : sme2_dot_array_vg2_index_HToS<"uvdot", 0b0110>; +defm UVDOT_VG4_M4ZZI_BtoS : sme2_dot_array_vg4_index_BtoS<"uvdot", 0b0110>; + +defm BMOPA_TPPZZ : sme2_bmopx_tile_base<"bmopa", 0b000>; +defm BMOPS_TPPZZ : sme2_bmopx_tile_base<"bmops", 0b001>; + +defm SMOPA_TPPZZ : sme2_int_mopx_tile_base<"smopa", 0b100>; +defm SMOPS_TPPZZ : sme2_int_mopx_tile_base<"smops", 0b101>; + +defm UMOPA_TPPZZ : sme2_int_mopx_tile_base<"umopa", 0b110>; +defm UMOPS_TPPZZ : sme2_int_mopx_tile_base<"umops", 0b111>; + +def ZERO_T : sme2_zero_zt<"zero">; + +def LDR_TX : sme2_spill_fill_vector<"ldr", 0b0>; +def STR_TX : sme2_spill_fill_vector<"str", 0b1>; + +def MOVT_XTI : sme2_movt_zt_to_scalar<"movt">; +def MOVT_TIX : sme2_movt_scalar_to_zt<"movt">; + +defm LUTI2_ZTZI : sme2_luti2_vector_index<"luti2">; +defm LUTI2_2ZTZI : sme2_luti2_vector_vg2_index<"luti2">; +defm LUTI2_4ZTZI : sme2_luti2_vector_vg4_index<"luti2">; + +defm LUTI4_ZTZI : sme2_luti4_vector_index<"luti4">; +defm LUTI4_2ZTZI : sme2_luti4_vector_vg2_index<"luti4">; +defm LUTI4_4ZTZI : sme2_luti4_vector_vg4_index<"luti4">; + +defm SUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"sunpk", 0b0>; +defm SUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"sunpk", 0b0>; +defm UUNPK_VG2_2ZZ : sme2_unpk_vector_vg2<"uunpk", 0b1>; +defm UUNPK_VG4_4Z2Z : sme2_unpk_vector_vg4<"uunpk", 0b1>; + +defm ZIP_VG2_2ZZZ : sme2_zip_vector_vg2<"zip", 0b0>; +defm UZP_VG2_2ZZZ : sme2_zip_vector_vg2<"uzp", 0b1>; +defm ZIP_VG4_4Z4Z : sme2_zip_vector_vg4<"zip", 0b0>; +defm UZP_VG4_4Z4Z : sme2_zip_vector_vg4<"uzp", 0b1>; + +defm INSERT_TI2Z : sme2_mova_vec_to_tile_vg2_multi<"mova", int_aarch64_sme_write_hor_vg2, int_aarch64_sme_write_ver_vg2>; +defm INSERT_TI4Z : sme2_mova_vec_to_tile_vg4_multi<"mova", int_aarch64_sme_write_hor_vg4, int_aarch64_sme_write_ver_vg4>; +defm EXTRACT_2ZTI : sme2_mova_tile_to_vec_vg2_multi<"mov">; +defm EXTRACT_4ZTI : sme2_mova_tile_to_vec_vg4_multi<"mov">; + +defm INSERT_VG2_2ZMI : sme2_mova_vec_to_array_vg2_multi<"mova", int_aarch64_sme_write_vg1x2>; +defm INSERT_VG4_4ZMI : sme2_mova_vec_to_array_vg4_multi<"mova", int_aarch64_sme_write_vg1x4>; +defm EXTRACT_VG2_2ZMI : sme2_mova_array_to_vec_vg2_multi<"mova">; +defm EXTRACT_VG4_4ZMI : sme2_mova_array_to_vec_vg4_multi<"mova">; + +defm SQRSHR_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshr", 0b0, 0b0, int_aarch64_sve_sqrshr_vgx2>; +defm SQRSHR_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshr", 0b000, int_aarch64_sve_sqrshr_vgx4>; + +defm UQRSHR_Z2ZI : sme2_sat_shift_vector_vg2<"uqrshr", 0b0, 0b1, int_aarch64_sve_uqrshr_vgx2>; +defm UQRSHR_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshr", 0b001, int_aarch64_sve_uqrshr_vgx4>; + +defm SQRSHRU_Z2ZI : sme2_sat_shift_vector_vg2<"sqrshru", 0b1, 0b0, int_aarch64_sve_sqrshru_vgx2>; +defm SQRSHRU_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshru", 0b010, int_aarch64_sve_sqrshru_vgx4>; + +defm SQRSHRN_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrn", 0b100, int_aarch64_sve_sqrshrn_vgx4>; +defm UQRSHRN_Z4ZI : sme2_sat_shift_vector_vg4<"uqrshrn", 0b101, int_aarch64_sve_uqrshrn_vgx4>; + +defm SQRSHRUN_Z4ZI : sme2_sat_shift_vector_vg4<"sqrshrun", 0b110, int_aarch64_sve_sqrshrun_vgx4>; + +def LD1B_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "ld1b">; +def LD1B_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "ld1b">; +defm LD1B_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "ld1b">; +defm LD1B_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "ld1b">; +def LD1H_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "ld1h">; +def LD1H_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "ld1h">; +defm LD1H_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "ld1h">; +defm LD1H_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "ld1h">; +def LD1W_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "ld1w">; +def LD1W_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "ld1w">; +defm LD1W_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "ld1w">; +defm LD1W_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "ld1w">; +def LD1D_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "ld1d">; +def LD1D_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "ld1d">; +defm LD1D_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "ld1d">; +defm LD1D_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "ld1d">; + +def LDNT1B_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "ldnt1b">; +def LDNT1B_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "ldnt1b">; +defm LDNT1B_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "ldnt1b">; +defm LDNT1B_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "ldnt1b">; +def LDNT1H_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "ldnt1h">; +def LDNT1H_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "ldnt1h">; +defm LDNT1H_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "ldnt1h">; +defm LDNT1H_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "ldnt1h">; +def LDNT1W_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "ldnt1w">; +def LDNT1W_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "ldnt1w">; +defm LDNT1W_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "ldnt1w">; +defm LDNT1W_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "ldnt1w">; +def LDNT1D_VG2_M2ZPXX : sme2_ld_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "ldnt1d">; +def LDNT1D_VG4_M4ZPXX : sme2_ld_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "ldnt1d">; +defm LDNT1D_VG2_M2ZPXI : sme2_ld_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "ldnt1d">; +defm LDNT1D_VG4_M4ZPXI : sme2_ld_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "ldnt1d">; + +def ST1B_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b0, ZZ_b_strided, GPR64shifted8, "st1b">; +def ST1B_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b0, ZZZZ_b_strided, GPR64shifted8, "st1b">; +defm ST1B_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b0, ZZ_b_strided, simm4s2, "st1b">; +defm ST1B_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b0, ZZZZ_b_strided, simm4s4, "st1b">; +def ST1H_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b0, ZZ_h_strided, GPR64shifted16, "st1h">; +def ST1H_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b0, ZZZZ_h_strided, GPR64shifted16, "st1h">; +defm ST1H_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b0, ZZ_h_strided, simm4s2, "st1h">; +defm ST1H_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b0, ZZZZ_h_strided, simm4s4, "st1h">; +def ST1W_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b0, ZZ_s_strided, GPR64shifted32, "st1w">; +def ST1W_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b0, ZZZZ_s_strided, GPR64shifted32, "st1w">; +defm ST1W_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b0, ZZ_s_strided, simm4s2, "st1w">; +defm ST1W_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b0, ZZZZ_s_strided, simm4s4, "st1w">; +def ST1D_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b0, ZZ_d_strided, GPR64shifted64, "st1d">; +def ST1D_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b0, ZZZZ_d_strided, GPR64shifted64, "st1d">; +defm ST1D_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b0, ZZ_d_strided, simm4s2, "st1d">; +defm ST1D_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b0, ZZZZ_d_strided, simm4s4, "st1d">; + +def STNT1B_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b00, 0b1, ZZ_b_strided, GPR64shifted8, "stnt1b">; +def STNT1B_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b00, 0b1, ZZZZ_b_strided, GPR64shifted8, "stnt1b">; +defm STNT1B_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b00, 0b1, ZZ_b_strided, simm4s2, "stnt1b">; +defm STNT1B_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b00, 0b1, ZZZZ_b_strided, simm4s4, "stnt1b">; +def STNT1H_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b01, 0b1, ZZ_h_strided, GPR64shifted16, "stnt1h">; +def STNT1H_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b01, 0b1, ZZZZ_h_strided, GPR64shifted16, "stnt1h">; +defm STNT1H_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b01, 0b1, ZZ_h_strided, simm4s2, "stnt1h">; +defm STNT1H_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b01, 0b1, ZZZZ_h_strided, simm4s4, "stnt1h">; +def STNT1W_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b10, 0b1, ZZ_s_strided, GPR64shifted32, "stnt1w">; +def STNT1W_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b10, 0b1, ZZZZ_s_strided, GPR64shifted32, "stnt1w">; +defm STNT1W_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b10, 0b1, ZZ_s_strided, simm4s2, "stnt1w">; +defm STNT1W_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b10, 0b1, ZZZZ_s_strided, simm4s4, "stnt1w">; +def STNT1D_VG2_M2ZPXX : sme2_st_vector_vg2_multi_scalar_scalar<0b11, 0b1, ZZ_d_strided, GPR64shifted64, "stnt1d">; +def STNT1D_VG4_M4ZPXX : sme2_st_vector_vg4_multi_scalar_scalar<0b11, 0b1, ZZZZ_d_strided, GPR64shifted64, "stnt1d">; +defm STNT1D_VG2_M2ZPXI : sme2_st_vector_vg2_multi_scalar_immediate<0b11, 0b1, ZZ_d_strided, simm4s2, "stnt1d">; +defm STNT1D_VG4_M4ZPXI : sme2_st_vector_vg4_multi_scalar_immediate<0b11, 0b1, ZZZZ_d_strided, simm4s4, "stnt1d">; } let Predicates = [HasSME2, HasSMEI16I64] in { -defm ADD_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"add", 0b10>; -defm ADD_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"add", 0b10>; -defm ADD_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"add", 0b10>; -defm ADD_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"add", 0b10>; - -defm SUB_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"sub", 0b11>; -defm SUB_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"sub", 0b11>; -defm SUB_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"sub", 0b11>; -defm SUB_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"sub", 0b11>; +defm ADD_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"add", 0b10, null_frag>; +defm ADD_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"add", 0b10, null_frag>; +defm ADD_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"add", 0b10, null_frag>; +defm ADD_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"add", 0b10, null_frag>; + +defm SUB_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"sub", 0b11, null_frag>; +defm SUB_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"sub", 0b11, null_frag>; +defm SUB_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"sub", 0b11, null_frag>; +defm SUB_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"sub", 0b11, null_frag>; + +def SMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlall", 0b00>; +defm SMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlall", 0b00>; +defm SMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlall", 0b00>; +defm SMLALL_MZZ_HtoD : sme2_mla_ll_array_single_64b<"smlall", 0b000>; +defm SMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single_64b<"smlall", 0b000>; +defm SMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single_64b<"smlall", 0b000>; +defm SMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi_64b<"smlall", 0b000>; +defm SMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi_64b<"smlall", 0b000>; + +def SMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"smlsll", 0b01>; +defm SMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"smlsll", 0b01>; +defm SMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"smlsll", 0b01>; +defm SMLSLL_MZZ_HtoD : sme2_mla_ll_array_single_64b<"smlsll", 0b010>; +defm SMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single_64b<"smlsll", 0b010>; +defm SMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single_64b<"smlsll", 0b010>; +defm SMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi_64b<"smlsll", 0b010>; +defm SMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi_64b<"smlsll", 0b010>; + +def UMLALL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlall", 0b10>; +defm UMLALL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlall", 0b10>; +defm UMLALL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlall", 0b10>; +defm UMLALL_MZZ_HtoD : sme2_mla_ll_array_single_64b<"umlall", 0b100>; +defm UMLALL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single_64b<"umlall", 0b100>; +defm UMLALL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single_64b<"umlall", 0b100>; +defm UMLALL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi_64b<"umlall", 0b100>; +defm UMLALL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi_64b<"umlall", 0b100>; + +def UMLSLL_MZZI_HtoD : sme2_mla_ll_array_index_64b<"umlsll", 0b11>; +defm UMLSLL_VG2_M2ZZI_HtoD : sme2_mla_ll_array_vg2_index_64b<"umlsll", 0b11>; +defm UMLSLL_VG4_M4ZZI_HtoD : sme2_mla_ll_array_vg4_index_64b<"umlsll", 0b11>; +defm UMLSLL_MZZ_HtoD : sme2_mla_ll_array_single_64b<"umlsll", 0b110>; +defm UMLSLL_VG2_M2ZZ_HtoD : sme2_mla_ll_array_vg2_single_64b<"umlsll", 0b110>; +defm UMLSLL_VG4_M4ZZ_HtoD : sme2_mla_ll_array_vg4_single_64b<"umlsll", 0b110>; +defm UMLSLL_VG2_M2Z2Z_HtoD : sme2_mla_ll_array_vg2_multi_64b<"umlsll", 0b110>; +defm UMLSLL_VG4_M4Z4Z_HtoD : sme2_mla_ll_array_vg4_multi_64b<"umlsll", 0b110>; defm ADDA_VG2_M2Z2Z_D : sme2_multivec_accum_add_sub_vg2_D<"add", 0b10>; defm ADDA_VG4_M4Z4Z_D : sme2_multivec_accum_add_sub_vg4_D<"add", 0b10>; defm SUBA_VG2_M2Z2Z_D : sme2_multivec_accum_add_sub_vg2_D<"sub", 0b11>; defm SUBA_VG4_M4Z4Z_D : sme2_multivec_accum_add_sub_vg4_D<"sub", 0b11>; + +defm SDOT_VG2_M2ZZI_HtoD : sme2_dot_array_vg2_index<"sdot", 0b01>; +defm SDOT_VG4_M4ZZI_HtoD : sme2_dot_array_vg4_index<"sdot", 0b001>; +defm SDOT_VG2_M2ZZ_HtoD : sme2_int_dot_array_vg2_single_HtoD<"sdot", 0b0>; +defm SDOT_VG4_M4ZZ_HtoD : sme2_int_dot_array_vg4_single_HtoD<"sdot", 0b0>; +defm SDOT_VG2_M2Z2Z_HtoD : sme2_int_dot_array_vg2_multi_HtoD<"sdot", 0b0>; +defm SDOT_VG4_M4Z4Z_HtoD : sme2_int_dot_array_vg4_multi_HtoD<"sdot", 0b0>; + +defm SVDOT_VG4_M4ZZI_HtoD : sme2_dot_array_vg4_index<"svdot", 0b101>; +defm UDOT_VG2_M2ZZI_HtoD : sme2_dot_array_vg2_index<"udot", 0b11>; +defm UDOT_VG4_M4ZZI_HtoD : sme2_dot_array_vg4_index<"udot", 0b011>; +defm UDOT_VG2_M2ZZ_HtoD : sme2_int_dot_array_vg2_single_HtoD<"udot", 0b1>; +defm UDOT_VG4_M4ZZ_HtoD : sme2_int_dot_array_vg4_single_HtoD<"udot", 0b1>; +defm UDOT_VG2_M2Z2Z_HtoD : sme2_int_dot_array_vg2_multi_HtoD<"udot", 0b1>; +defm UDOT_VG4_M4Z4Z_HtoD : sme2_int_dot_array_vg4_multi_HtoD<"udot", 0b1>; + +defm UVDOT_VG4_M4ZZI_HtoD : sme2_dot_array_vg4_index<"uvdot", 0b111>; } let Predicates = [HasSME2, HasSMEF64F64] in { -defm FMLA_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"fmla", 0b00>; -defm FMLA_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"fmla", 0b00>; -defm FMLA_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"fmla", 0b00>; -defm FMLA_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"fmla", 0b00>; - -defm FMLS_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"fmls", 0b01>; -defm FMLS_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"fmls", 0b01>; -defm FMLS_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"fmls", 0b01>; -defm FMLS_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"fmls", 0b01>; +defm FMLA_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"fmla", 0b00, int_aarch64_sme_fmla_single_vg1x2>; +defm FMLA_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"fmla", 0b00, int_aarch64_sme_fmla_single_vg1x4>; +defm FMLA_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"fmla", 0b00, int_aarch64_sme_fmla_multi_vg1x2>; +defm FMLA_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"fmla", 0b00, int_aarch64_sme_fmla_multi_vg1x4>; +defm FMLA_VG2_M2ZZI_D : sme2_fmla_array_vg2_index_D<"fmla", 0b00, int_aarch64_sme_fmla_lane_vg1x2>; +defm FMLA_VG4_M4ZZI_D : sme2_fmla_array_vg4_index_D<"fmla", 0b000, int_aarch64_sme_fmla_lane_vg1x4>; + +defm FMLS_VG2_M2ZZ_D : sme2_mla_add_sub_array_vg2_single_D<"fmls", 0b01, int_aarch64_sme_fmls_single_vg1x2>; +defm FMLS_VG4_M4ZZ_D : sme2_mla_add_sub_array_vg4_single_D<"fmls", 0b01, int_aarch64_sme_fmls_single_vg1x4>; +defm FMLS_VG2_M2Z2Z_D : sme2_mla_add_sub_array_vg2_multi_D<"fmls", 0b01, int_aarch64_sme_fmls_multi_vg1x2>; +defm FMLS_VG4_M4Z4Z_D : sme2_mla_add_sub_array_vg4_multi_D<"fmls", 0b01, int_aarch64_sme_fmls_multi_vg1x4>; +defm FMLS_VG2_M2ZZI_D : sme2_fmla_array_vg2_index_D<"fmls", 0b10, int_aarch64_sme_fmls_lane_vg1x2>; +defm FMLS_VG4_M4ZZI_D : sme2_fmla_array_vg4_index_D<"fmls", 0b010, int_aarch64_sme_fmls_lane_vg1x4>; defm FADD_VG2_M2Z2Z_D : sme2_multivec_accum_add_sub_vg2_D<"fadd", 0b00>; defm FADD_VG4_M4Z4Z_D : sme2_multivec_accum_add_sub_vg4_D<"fadd", 0b00>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -2445,6 +2445,10 @@ def : Pat<(nxv2f64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2f64 ZPR:$src)>; } + // These allow casting from/to the opaque aarch64svcount type. + def : Pat<(aarch64svcount (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + def : Pat<(nxv16i1 (reinterpret_cast (aarch64svcount PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; + // These allow casting from/to unpacked predicate types. def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; def : Pat<(nxv16i1 (reinterpret_cast (nxv8i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; @@ -2703,6 +2707,7 @@ } defm Pat_Store_P16 : unpred_store_predicate; + defm Pat_Store_PredAsCount : unpred_store_predicate; multiclass unpred_load_predicate { def _fi : Pat<(Ty (load (am_sve_fi GPR64sp:$base, simm9:$offset))), @@ -2713,6 +2718,7 @@ } defm Pat_Load_P16 : unpred_load_predicate; + defm Pat_Load_PredAsCount : unpred_load_predicate; multiclass ld1 { @@ -3577,4 +3583,132 @@ defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp">; def FDOT_ZZZ_S : sve_float_dot<0b0, "fdot">; def FDOT_ZZZI_S : sve_float_dot_indexed<0b0, "fdot">; + +defm PEXT_PCI : sve2p1_int_ctr_to_mask<"pext", int_aarch64_sve_pext>; +defm PTRUE_C : sve2p1_ptrue_pn<"ptrue">; + +// Load to two consecutive registers +def LD1B_2ZCXX : sve2p1_mem_cld_ss_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>; +def LD1H_2ZCXX : sve2p1_mem_cld_ss_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r, GPR64shifted16>; +def LD1W_2ZCXX : sve2p1_mem_cld_ss_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r, GPR64shifted32>; +def LD1D_2ZCXX : sve2p1_mem_cld_ss_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r, GPR64shifted64>; +defm LD1B_2ZCXI : sve2p1_mem_cld_si_2z<"ld1b", 0b00, 0b0, ZZ_b_mul_r>; +defm LD1H_2ZCXI : sve2p1_mem_cld_si_2z<"ld1h", 0b01, 0b0, ZZ_h_mul_r>; +defm LD1W_2ZCXI : sve2p1_mem_cld_si_2z<"ld1w", 0b10, 0b0, ZZ_s_mul_r>; +defm LD1D_2ZCXI : sve2p1_mem_cld_si_2z<"ld1d", 0b11, 0b0, ZZ_d_mul_r>; +def LDNT1B_2ZCXX : sve2p1_mem_cld_ss_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r, GPR64shifted8>; +def LDNT1H_2ZCXX : sve2p1_mem_cld_ss_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r, GPR64shifted16>; +def LDNT1W_2ZCXX : sve2p1_mem_cld_ss_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r, GPR64shifted32>; +def LDNT1D_2ZCXX : sve2p1_mem_cld_ss_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r, GPR64shifted64>; +defm LDNT1B_2ZCXI : sve2p1_mem_cld_si_2z<"ldnt1b", 0b00, 0b1, ZZ_b_mul_r>; +defm LDNT1H_2ZCXI : sve2p1_mem_cld_si_2z<"ldnt1h", 0b01, 0b1, ZZ_h_mul_r>; +defm LDNT1W_2ZCXI : sve2p1_mem_cld_si_2z<"ldnt1w", 0b10, 0b1, ZZ_s_mul_r>; +defm LDNT1D_2ZCXI : sve2p1_mem_cld_si_2z<"ldnt1d", 0b11, 0b1, ZZ_d_mul_r>; + +// Load to four consecutive registers +def LD1B_4ZCXX : sve2p1_mem_cld_ss_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r, GPR64shifted8>; +def LD1H_4ZCXX : sve2p1_mem_cld_ss_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r, GPR64shifted16>; +def LD1W_4ZCXX : sve2p1_mem_cld_ss_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r, GPR64shifted32>; +def LD1D_4ZCXX : sve2p1_mem_cld_ss_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r, GPR64shifted64>; +defm LD1B_4ZCXI : sve2p1_mem_cld_si_4z<"ld1b", 0b00, 0b0, ZZZZ_b_mul_r>; +defm LD1H_4ZCXI : sve2p1_mem_cld_si_4z<"ld1h", 0b01, 0b0, ZZZZ_h_mul_r>; +defm LD1W_4ZCXI : sve2p1_mem_cld_si_4z<"ld1w", 0b10, 0b0, ZZZZ_s_mul_r>; +defm LD1D_4ZCXI : sve2p1_mem_cld_si_4z<"ld1d", 0b11, 0b0, ZZZZ_d_mul_r>; +def LDNT1B_4ZCXX : sve2p1_mem_cld_ss_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r, GPR64shifted8>; +def LDNT1H_4ZCXX : sve2p1_mem_cld_ss_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r, GPR64shifted16>; +def LDNT1W_4ZCXX : sve2p1_mem_cld_ss_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r, GPR64shifted32>; +def LDNT1D_4ZCXX : sve2p1_mem_cld_ss_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r, GPR64shifted64>; +defm LDNT1B_4ZCXI : sve2p1_mem_cld_si_4z<"ldnt1b", 0b00, 0b1, ZZZZ_b_mul_r>; +defm LDNT1H_4ZCXI : sve2p1_mem_cld_si_4z<"ldnt1h", 0b01, 0b1, ZZZZ_h_mul_r>; +defm LDNT1W_4ZCXI : sve2p1_mem_cld_si_4z<"ldnt1w", 0b10, 0b1, ZZZZ_s_mul_r>; +defm LDNT1D_4ZCXI : sve2p1_mem_cld_si_4z<"ldnt1d", 0b11, 0b1, ZZZZ_d_mul_r>; + +// Stores of two consecutive registers +def ST1B_2ZCXX : sve2p1_mem_cst_ss_2z<"st1b", 0b00, 0b0, ZZ_b_mul_r, GPR64shifted8>; +def ST1H_2ZCXX : sve2p1_mem_cst_ss_2z<"st1h", 0b01, 0b0, ZZ_h_mul_r, GPR64shifted16>; +def ST1W_2ZCXX : sve2p1_mem_cst_ss_2z<"st1w", 0b10, 0b0, ZZ_s_mul_r, GPR64shifted32>; +def ST1D_2ZCXX : sve2p1_mem_cst_ss_2z<"st1d", 0b11, 0b0, ZZ_d_mul_r, GPR64shifted64>; +defm ST1B_2ZCXI : sve2p1_mem_cst_si_2z<"st1b", 0b00, 0b0, ZZ_b_mul_r>; +defm ST1H_2ZCXI : sve2p1_mem_cst_si_2z<"st1h", 0b01, 0b0, ZZ_h_mul_r>; +defm ST1W_2ZCXI : sve2p1_mem_cst_si_2z<"st1w", 0b10, 0b0, ZZ_s_mul_r>; +defm ST1D_2ZCXI : sve2p1_mem_cst_si_2z<"st1d", 0b11, 0b0, ZZ_d_mul_r>; +def STNT1B_2ZCXX : sve2p1_mem_cst_ss_2z<"stnt1b", 0b00, 0b1, ZZ_b_mul_r, GPR64shifted8>; +def STNT1H_2ZCXX : sve2p1_mem_cst_ss_2z<"stnt1h", 0b01, 0b1, ZZ_h_mul_r, GPR64shifted16>; +def STNT1W_2ZCXX : sve2p1_mem_cst_ss_2z<"stnt1w", 0b10, 0b1, ZZ_s_mul_r, GPR64shifted32>; +def STNT1D_2ZCXX : sve2p1_mem_cst_ss_2z<"stnt1d", 0b11, 0b1, ZZ_d_mul_r, GPR64shifted64>; +defm STNT1B_2ZCXI : sve2p1_mem_cst_si_2z<"stnt1b", 0b00, 0b1, ZZ_b_mul_r>; +defm STNT1H_2ZCXI : sve2p1_mem_cst_si_2z<"stnt1h", 0b01, 0b1, ZZ_h_mul_r>; +defm STNT1W_2ZCXI : sve2p1_mem_cst_si_2z<"stnt1w", 0b10, 0b1, ZZ_s_mul_r>; +defm STNT1D_2ZCXI : sve2p1_mem_cst_si_2z<"stnt1d", 0b11, 0b1, ZZ_d_mul_r>; + +// Stores of four consecutive registers +def ST1B_4ZCXX : sve2p1_mem_cst_ss_4z<"st1b", 0b00, 0b0, ZZZZ_b_mul_r, GPR64shifted8>; +def ST1H_4ZCXX : sve2p1_mem_cst_ss_4z<"st1h", 0b01, 0b0, ZZZZ_h_mul_r, GPR64shifted16>; +def ST1W_4ZCXX : sve2p1_mem_cst_ss_4z<"st1w", 0b10, 0b0, ZZZZ_s_mul_r, GPR64shifted32>; +def ST1D_4ZCXX : sve2p1_mem_cst_ss_4z<"st1d", 0b11, 0b0, ZZZZ_d_mul_r, GPR64shifted64>; +defm ST1B_4ZCXI : sve2p1_mem_cst_si_4z<"st1b", 0b00, 0b0, ZZZZ_b_mul_r>; +defm ST1H_4ZCXI : sve2p1_mem_cst_si_4z<"st1h", 0b01, 0b0, ZZZZ_h_mul_r>; +defm ST1W_4ZCXI : sve2p1_mem_cst_si_4z<"st1w", 0b10, 0b0, ZZZZ_s_mul_r>; +defm ST1D_4ZCXI : sve2p1_mem_cst_si_4z<"st1d", 0b11, 0b0, ZZZZ_d_mul_r>; +def STNT1B_4ZCXX : sve2p1_mem_cst_ss_4z<"stnt1b", 0b00, 0b1, ZZZZ_b_mul_r, GPR64shifted8>; +def STNT1H_4ZCXX : sve2p1_mem_cst_ss_4z<"stnt1h", 0b01, 0b1, ZZZZ_h_mul_r, GPR64shifted16>; +def STNT1W_4ZCXX : sve2p1_mem_cst_ss_4z<"stnt1w", 0b10, 0b1, ZZZZ_s_mul_r, GPR64shifted32>; +def STNT1D_4ZCXX : sve2p1_mem_cst_ss_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r, GPR64shifted64>; +defm STNT1B_4ZCXI : sve2p1_mem_cst_si_4z<"stnt1b", 0b00, 0b1, ZZZZ_b_mul_r>; +defm STNT1H_4ZCXI : sve2p1_mem_cst_si_4z<"stnt1h", 0b01, 0b1, ZZZZ_h_mul_r>; +defm STNT1W_4ZCXI : sve2p1_mem_cst_si_4z<"stnt1w", 0b10, 0b1, ZZZZ_s_mul_r>; +defm STNT1D_4ZCXI : sve2p1_mem_cst_si_4z<"stnt1d", 0b11, 0b1, ZZZZ_d_mul_r>; + + multiclass store_pn_vg2 { + def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), + (aarch64svcount PPR:$PNg), GPR64:$base), + (RegImmInst (REG_SEQUENCE ZPR2, Ty:$vec0, zsub0, Ty:$vec1, zsub1), + PPR:$PNg, GPR64:$base, (i64 0))>; + } + + // Stores of 2 consecutive vectors + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + defm : store_pn_vg2; + + multiclass store_pn_vg4 { + def : Pat<(Store (Ty ZPR:$vec0), (Ty ZPR:$vec1), (Ty ZPR:$vec2), (Ty ZPR:$vec3), + (aarch64svcount PPR:$PNg), GPR64:$base), + (RegImmInst (REG_SEQUENCE ZPR4, Ty:$vec0, zsub0, Ty:$vec1, zsub1, + Ty:$vec2, zsub2, Ty:$vec3, zsub3), + PPR:$PNg, GPR64:$base, (i64 0))>; + } + + // Stores of 4 consecutive vectors + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; + defm : store_pn_vg4; } // End HasSVE2p1_or_HasSME2 diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -158,6 +158,11 @@ cl::desc("Enable SVE intrinsic opts"), cl::init(true)); +cl::opt + EnableSMEPeepholeOpt("enable-aarch64-sme-peephole-opt", cl::init(true), + cl::Hidden, + cl::desc("Perform SME peephole optimization")); + static cl::opt EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", cl::init(true), cl::Hidden); @@ -212,6 +217,7 @@ initializeAArch64KCFIPass(*PR); initializeAArch64LoadStoreOptPass(*PR); initializeAArch64MIPeepholeOptPass(*PR); + initializeSMEPeepholeOptPass(*PR); initializeAArch64SIMDInstrOptPass(*PR); initializeAArch64O0PreLegalizerCombinerPass(*PR); initializeAArch64PreLegalizerCombinerPass(*PR); @@ -706,6 +712,9 @@ } void AArch64PassConfig::addMachineSSAOptimization() { + if (TM->getOptLevel() != CodeGenOpt::None && EnableSMEPeepholeOpt) + addPass(createSMEPeepholeOptPass()); + // Run default MachineSSAOptimization first. TargetPassConfig::addMachineSSAOptimization(); diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -9,6 +9,7 @@ #include "AArch64TargetTransformInfo.h" #include "AArch64ExpandImm.h" #include "AArch64PerfectShuffle.h" +#include "Utils/AArch64BaseInfo.h" #include "MCTargetDesc/AArch64AddressingModes.h" #include "llvm/Analysis/IVDescriptors.h" #include "llvm/Analysis/LoopInfo.h" diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -68,6 +68,7 @@ Scalar, NeonVector, SVEDataVector, + SMEPredicateAsCounter, SVEPredicateVector, Matrix }; @@ -224,6 +225,7 @@ bool validateInstruction(MCInst &Inst, SMLoc &IDLoc, SmallVectorImpl &Loc); + unsigned getNumRegsForRegKind(RegKind K); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, OperandVector &Operands, MCStreamer &Out, uint64_t &ErrorInfo, @@ -266,6 +268,7 @@ template OperandMatchResultTy tryParseSVEDataVector(OperandVector &Operands); OperandMatchResultTy tryParseSVEPredicateVector(OperandVector &Operands); + OperandMatchResultTy tryParseSMEPredicateAsCounter(OperandVector &Operands); template OperandMatchResultTy tryParseVectorList(OperandVector &Operands, bool ExpectMatch = false); @@ -401,6 +404,7 @@ struct VectorListOp { unsigned RegNum; unsigned Count; + unsigned Stride; unsigned NumElements; unsigned ElementWidth; RegKind RegisterKind; @@ -680,6 +684,11 @@ return VectorList.Count; } + unsigned getVectorListStride() const { + assert(Kind == k_VectorList && "Invalid access!"); + return VectorList.Stride; + } + int getVectorIndex() const { assert(Kind == k_VectorIndex && "Invalid access!"); return VectorIndex.Val; @@ -1198,6 +1207,22 @@ bool isMatrix() const { return Kind == k_MatrixRegister; } bool isMatrixTileList() const { return Kind == k_MatrixTileList; } + template bool isSMEVectorReg() const { + RegKind RK; + switch (Class) { + case AArch64::PPRRegClassID: + case AArch64::PPR_3bRegClassID: + case AArch64::PPR_3b_p8_p15RegClassID: + RK = RegKind::SMEPredicateAsCounter; + break; + default: + llvm_unreachable("Unsupport register class"); + } + + return (Kind == k_Register && Reg.Kind == RK) && + AArch64MCRegisterClasses[Class].contains(getReg()); + } + template bool isSVEVectorReg() const { RegKind RK; switch (Class) { @@ -1234,6 +1259,17 @@ return DiagnosticPredicateTy::NearMatch; } + template + DiagnosticPredicate isSMEPredicateAsCounterRegOfWidth() const { + if (Kind != k_Register || Reg.Kind != RegKind::SMEPredicateAsCounter) + return DiagnosticPredicateTy::NoMatch; + + if (isSMEVectorReg() && (Reg.ElementWidth == ElementWidth)) + return DiagnosticPredicateTy::Match; + + return DiagnosticPredicateTy::NearMatch; + } + template DiagnosticPredicate isSVEDataVectorRegOfWidth() const { if (Kind != k_Register || Reg.Kind != RegKind::SVEDataVector) @@ -1335,7 +1371,7 @@ } template + unsigned ElementWidth, unsigned Stride = 1> bool isTypedVectorList() const { if (Kind != k_VectorList) return false; @@ -1345,6 +1381,8 @@ return false; if (VectorList.ElementWidth != ElementWidth) return false; + if (VectorList.Stride != Stride) + return false; return VectorList.NumElements == NumElements; } @@ -1360,6 +1398,28 @@ return DiagnosticPredicateTy::Match; } + template + DiagnosticPredicate isTypedVectorListStrided() const { + if (Kind != k_VectorList) + return DiagnosticPredicateTy::NoMatch; + if (VectorList.Count != NumRegs) + return DiagnosticPredicateTy::NoMatch; + if (VectorList.RegisterKind != VectorKind) + return DiagnosticPredicateTy::NoMatch; + if (VectorList.NumElements != 0) + return DiagnosticPredicateTy::NoMatch; + if (VectorList.ElementWidth != ElementWidth) + return DiagnosticPredicateTy::NearMatch; + if (VectorList.Stride != Stride) + return DiagnosticPredicateTy::NearMatch; + if ((VectorList.RegNum < (AArch64::Z0 + Stride)) || + ((VectorList.RegNum >= AArch64::Z16) && + (VectorList.RegNum < (AArch64::Z16 + Stride)))) + return DiagnosticPredicateTy::Match; + return DiagnosticPredicateTy::NoMatch; + } + template DiagnosticPredicate isVectorIndex() const { if (Kind != k_VectorIndex) @@ -1709,6 +1769,33 @@ FirstRegs[(unsigned)RegTy][0])); } + template + void addStridedVectorListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + assert((NumRegs == 2 || NumRegs == 4) && " NumRegs must be 2 or 4"); + + switch (NumRegs) { + case 2: + if ((getVectorListStart() - AArch64::Z0) < 16) + Inst.addOperand(MCOperand::createReg( + AArch64::Z0_Z8 + getVectorListStart() - AArch64::Z0)); + else + Inst.addOperand(MCOperand::createReg( + AArch64::Z16_Z24 + getVectorListStart() - AArch64::Z16)); + break; + case 4: + if ((getVectorListStart() - AArch64::Z0) < 16) + Inst.addOperand(MCOperand::createReg( + AArch64::Z0_Z4_Z8_Z12 + getVectorListStart() - AArch64::Z0)); + else + Inst.addOperand(MCOperand::createReg( + AArch64::Z16_Z20_Z24_Z28 + getVectorListStart() - AArch64::Z16)); + break; + default: + llvm_unreachable("Unsupported number of registers for strided vec list"); + } + } + void addMatrixTileListOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); unsigned RegMask = getMatrixTileListRegMask(); @@ -2059,7 +2146,8 @@ unsigned ShiftAmount = 0, unsigned HasExplicitAmount = false) { assert((Kind == RegKind::NeonVector || Kind == RegKind::SVEDataVector || - Kind == RegKind::SVEPredicateVector) && + Kind == RegKind::SVEPredicateVector || + Kind == RegKind::SMEPredicateAsCounter) && "Invalid vector kind"); auto Op = CreateReg(RegNum, Kind, S, E, Ctx, EqualsReg, ExtTy, ShiftAmount, HasExplicitAmount); @@ -2068,12 +2156,13 @@ } static std::unique_ptr - CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, - unsigned ElementWidth, RegKind RegisterKind, SMLoc S, SMLoc E, - MCContext &Ctx) { + CreateVectorList(unsigned RegNum, unsigned Count, unsigned Stride, + unsigned NumElements, unsigned ElementWidth, + RegKind RegisterKind, SMLoc S, SMLoc E, MCContext &Ctx) { auto Op = std::make_unique(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; + Op->VectorList.Stride = Stride; Op->VectorList.NumElements = NumElements; Op->VectorList.ElementWidth = ElementWidth; Op->VectorList.RegisterKind = RegisterKind; @@ -2337,7 +2426,7 @@ OS << ""; break; } @@ -2478,6 +2567,7 @@ .Case(".d", {0, 64}) .Default({-1, -1}); break; + case RegKind::SMEPredicateAsCounter: case RegKind::SVEPredicateVector: case RegKind::SVEDataVector: case RegKind::Matrix: @@ -2562,6 +2652,27 @@ .Default(0); } +static unsigned matchSMEPredicateAsCounterRegName(StringRef Name) { + return StringSwitch(Name.lower()) + .Case("pn0", AArch64::P0) + .Case("pn1", AArch64::P1) + .Case("pn2", AArch64::P2) + .Case("pn3", AArch64::P3) + .Case("pn4", AArch64::P4) + .Case("pn5", AArch64::P5) + .Case("pn6", AArch64::P6) + .Case("pn7", AArch64::P7) + .Case("pn8", AArch64::P8) + .Case("pn9", AArch64::P9) + .Case("pn10", AArch64::P10) + .Case("pn11", AArch64::P11) + .Case("pn12", AArch64::P12) + .Case("pn13", AArch64::P13) + .Case("pn14", AArch64::P14) + .Case("pn15", AArch64::P15) + .Default(0); +} + static unsigned matchMatrixTileListRegName(StringRef Name) { return StringSwitch(Name.lower()) .Case("za0.d", AArch64::ZAD0) @@ -2705,6 +2816,9 @@ if ((RegNum = matchSVEPredicateVectorRegName(Name))) return Kind == RegKind::SVEPredicateVector ? RegNum : 0; + if ((RegNum = matchSMEPredicateAsCounterRegName(Name))) + return Kind == RegKind::SMEPredicateAsCounter ? RegNum : 0; + if ((RegNum = MatchNeonVectorRegName(Name))) return Kind == RegKind::NeonVector ? RegNum : 0; @@ -2739,6 +2853,21 @@ return RegNum; } +unsigned AArch64AsmParser::getNumRegsForRegKind(RegKind K) { + switch (K) { + case RegKind::Scalar: + case RegKind::NeonVector: + case RegKind::SVEDataVector: + return 32; + case RegKind::Matrix: + case RegKind::SVEPredicateVector: + case RegKind::SMEPredicateAsCounter: + return 16; + default: + llvm_unreachable("Unsupported RegKind"); + } +} + /// tryParseScalarRegister - Try to parse a register name. The token must be an /// Identifier when called, and if it is a register name the token is eaten and /// the register is added to the operand list. @@ -3071,6 +3200,14 @@ // Eat ',' Lex(); + StringRef VecGroup; + if (!parseOptionalVGOperand(Operands, VecGroup)) { + Operands.push_back( + AArch64Operand::CreateImm(Imm, S, getLoc(), getContext())); + Operands.push_back( + AArch64Operand::CreateToken(VecGroup, getLoc(), getContext())); + return MatchOperand_Success; + } // The optional operand must be "lsl #N" where N is non-negative. if (!getTok().is(AsmToken::Identifier) || @@ -3803,6 +3940,61 @@ return MatchOperand_NoMatch; } +OperandMatchResultTy +AArch64AsmParser::tryParseSMEPredicateAsCounter(OperandVector &Operands) { + const SMLoc S = getLoc(); + StringRef Kind; + unsigned RegNum; + auto Res = + tryParseVectorRegister(RegNum, Kind, RegKind::SMEPredicateAsCounter); + if (Res != MatchOperand_Success) + return Res; + + const auto &KindRes = parseVectorKind(Kind, RegKind::SMEPredicateAsCounter); + if (!KindRes) + return MatchOperand_NoMatch; + + unsigned ElementWidth = KindRes->second; + Operands.push_back( + AArch64Operand::CreateVectorReg(RegNum, RegKind::SMEPredicateAsCounter, + ElementWidth, S, getLoc(), getContext())); + + // Check if register is followed by an index + OperandMatchResultTy ResIndex = tryParseVectorIndex(Operands); + if (ResIndex == MatchOperand_Success) + return MatchOperand_Success; + + // Not all predicates are followed by a '/z'. + MCAsmParser &Parser = getParser(); + if (Parser.getTok().isNot(AsmToken::Slash)) + return MatchOperand_Success; + + // But when they do they shouldn't have an element type suffix. + if (!Kind.empty()) { + Error(S, "not expecting size suffix"); + return MatchOperand_ParseFail; + } + + // Add a literal slash as operand + Operands.push_back(AArch64Operand::CreateToken("/", getLoc(), getContext())); + + Parser.Lex(); // Eat the slash. + + // Zeroing or merging? + auto Pred = Parser.getTok().getString().lower(); + if (Pred != "z") { + Error(getLoc(), "expecting 'z' predication"); + return MatchOperand_ParseFail; + } + + // Add zero token. + const char *ZM = "z"; + Operands.push_back(AArch64Operand::CreateToken(ZM, getLoc(), getContext())); + + Parser.Lex(); // Eat zero token. + return MatchOperand_Success; +} + /// tryParseSVEPredicateVector - Parse a SVE predicate register operand. OperandMatchResultTy AArch64AsmParser::tryParseSVEPredicateVector(OperandVector &Operands) { @@ -3867,8 +4059,15 @@ return false; // Otherwise try for a scalar register. - if (tryParseGPROperand(Operands) == MatchOperand_Success) + if (tryParseGPROperand(Operands) == MatchOperand_Success) { + const auto &Op = static_cast(Operands.back().get()); + if (Op->isReg() && Op->getReg() == AArch64::ZT0 && + getTok().is(AsmToken::LBrac)) + // There's no comma after indexed ZT0 register, so we can parse the next + // operand immediately. + return parseOperand(Operands, false, false); return false; + } return true; } @@ -4079,6 +4278,11 @@ llvm_unreachable("Expected a valid vector kind"); } + if (RegTok.is(AsmToken::Identifier) && ParseRes == MatchOperand_NoMatch && + (RegTok.getString().startswith_insensitive("za") || + RegTok.getString().equals_insensitive("zt0"))) + return MatchOperand_NoMatch; + if (RegTok.isNot(AsmToken::Identifier) || ParseRes == MatchOperand_ParseFail || (ParseRes == MatchOperand_NoMatch && NoMatchIsError && @@ -4090,6 +4294,7 @@ return MatchOperand_NoMatch; }; + int NumRegs = getNumRegsForRegKind(VectorKind); SMLoc S = getLoc(); auto LCurly = getTok(); Lex(); // Eat left bracket token. @@ -4109,6 +4314,7 @@ int64_t PrevReg = FirstReg; unsigned Count = 1; + int Stride = 1; if (parseOptionalToken(AsmToken::Minus)) { SMLoc Loc = getLoc(); StringRef NextKind; @@ -4134,6 +4340,7 @@ Count += Space; } else { + bool HasCalculatedStride = false; while (parseOptionalToken(AsmToken::Comma)) { SMLoc Loc = getLoc(); StringRef NextKind; @@ -4148,10 +4355,18 @@ return MatchOperand_ParseFail; } - // Registers must be incremental (with wraparound at 31) - if (getContext().getRegisterInfo()->getEncodingValue(Reg) != - (getContext().getRegisterInfo()->getEncodingValue(PrevReg) + 1) % 32) { - Error(Loc, "registers must be sequential"); + unsigned RegVal = getContext().getRegisterInfo()->getEncodingValue(Reg); + unsigned PrevRegVal = + getContext().getRegisterInfo()->getEncodingValue(PrevReg); + if (!HasCalculatedStride) { + Stride = (PrevRegVal < RegVal) ? (RegVal - PrevRegVal) + : (RegVal + NumRegs - PrevRegVal); + HasCalculatedStride = true; + } + + // Register must be incremental (with a wraparound at last register). + if (Stride == 0 || RegVal != ((PrevRegVal + Stride) % NumRegs)) { + Error(Loc, "registers must have the same sequential stride"); return MatchOperand_ParseFail; } @@ -4176,8 +4391,8 @@ } Operands.push_back(AArch64Operand::CreateVectorList( - FirstReg, Count, NumElements, ElementWidth, VectorKind, S, getLoc(), - getContext())); + FirstReg, Count, Stride, NumElements, ElementWidth, VectorKind, S, + getLoc(), getContext())); return MatchOperand_Success; } @@ -4601,7 +4816,8 @@ if (AOp1.isVectorList() && AOp2.isVectorList()) return AOp1.getVectorListCount() == AOp2.getVectorListCount() && - AOp1.getVectorListStart() == AOp2.getVectorListStart(); + AOp1.getVectorListStart() == AOp2.getVectorListStart() && + AOp1.getVectorListStride() == AOp2.getVectorListStride(); if (!AOp1.isReg() || !AOp2.isReg()) return false; @@ -5413,6 +5629,27 @@ return Error(Loc, "immediate must be an integer in range [1, 32]."); case Match_InvalidImm1_64: return Error(Loc, "immediate must be an integer in range [1, 64]."); + // For SME2 + case Match_InvalidMemoryIndexed8UImm3: + return Error(Loc, "index must be a multiple of 8 in range [0, 56]."); + case Match_InvalidMemoryIndexedRange2UImm0: + return Error(Loc, "vector select offset must be the immediate range 0:1."); + case Match_InvalidMemoryIndexedRange4UImm0: + return Error(Loc, "vector select offset must be the immediate range 0:3."); + case Match_InvalidMemoryIndexedRange4UImm1: + case Match_InvalidMemoryIndexedRange4UImm2: + return Error( + Loc, + "vector select offset must be an immediate range of the form " + ":, " + "where the first immediate is a multiple of 4 in the range [0, 4] or " + "[0, 12] " + "depending on the instruction, and the second immediate is immf + 3."); + case Match_InvalidMemoryIndexedRange2UImm1: + return Error(Loc, "vector select offset must be an immediate range of the " + "form :, where the first " + "immediate is a multiple of 2 in the range [0, 2], and " + "the second immediate is immf + 1."); case Match_InvalidMemoryIndexedRange2UImm2: case Match_InvalidMemoryIndexedRange2UImm3: return Error( @@ -5573,6 +5810,14 @@ return Error(Loc, "invalid predicate register."); case Match_InvalidSVEPredicate3bAnyReg: return Error(Loc, "invalid restricted predicate register, expected p0..p7 (without element suffix)"); + case Match_InvalidSMEPNPredicateB_p8_p15Reg: + case Match_InvalidSMEPNPredicateH_p8_p15Reg: + case Match_InvalidSMEPNPredicateS_p8_p15Reg: + case Match_InvalidSMEPNPredicateD_p8_p15Reg: + return Error(Loc, "Invalid predicate register, expected PN in range pn8..pn15 with element suffix."); + case Match_InvalidSMEPNPredicateAny_p8_p15Reg: + return Error(Loc, "invalid restricted predicate-as-counter register " + "expected pn8..pn15"); case Match_InvalidSVEExactFPImmOperandHalfOne: return Error(Loc, "Invalid floating point constant, expected 0.5 or 1.0."); case Match_InvalidSVEExactFPImmOperandHalfTwo: @@ -5604,6 +5849,10 @@ return Error(Loc, "invalid matrix operand, expected za[0-7].d"); case Match_InvalidMatrix: return Error(Loc, "invalid matrix operand, expected za"); + case Match_InvalidMatrix8: + return Error(Loc, "invalid matrix operand, expected suffix .b"); + case Match_InvalidMatrix16: + return Error(Loc, "invalid matrix operand, expected suffix .h"); case Match_InvalidMatrix32: return Error(Loc, "invalid matrix operand, expected suffix .s"); case Match_InvalidMatrix64: @@ -5626,6 +5875,26 @@ return Error(Loc, "Invalid vector list, expected list with 4 consecutive " "SVE vectors, where the first vector is a multiple of 4 " "and with matching element types"); + case Match_InvalidSVEVectorListStrided2x8: + case Match_InvalidSVEVectorListStrided2x16: + case Match_InvalidSVEVectorListStrided2x32: + case Match_InvalidSVEVectorListStrided2x64: + return Error( + Loc, + "Invalid vector list, expected list with each SVE vector in the list " + "8 registers apart, and the first register in the range [z0, z7] or " + "[z16, z23] and with correct element type"); + case Match_InvalidSVEVectorListStrided4x8: + case Match_InvalidSVEVectorListStrided4x16: + case Match_InvalidSVEVectorListStrided4x32: + case Match_InvalidSVEVectorListStrided4x64: + return Error( + Loc, + "Invalid vector list, expected list with each SVE vector in the list " + "4 registers apart, and the first register in the range [z0, z3] or " + "[z16, z19] and with correct element type"); + case Match_InvalidZT0: + return Error(Loc, "operand must be zt0 register"); default: llvm_unreachable("unexpected error code!"); } @@ -6067,6 +6336,12 @@ case Match_InvalidImm1_16: case Match_InvalidImm1_32: case Match_InvalidImm1_64: + case Match_InvalidMemoryIndexed8UImm3: + case Match_InvalidMemoryIndexedRange2UImm0: + case Match_InvalidMemoryIndexedRange4UImm0: + case Match_InvalidMemoryIndexedRange4UImm1: + case Match_InvalidMemoryIndexedRange4UImm2: + case Match_InvalidMemoryIndexedRange2UImm1: case Match_InvalidMemoryIndexedRange2UImm2: case Match_InvalidMemoryIndexedRange2UImm3: case Match_InvalidSVEAddSubImm8: @@ -6145,12 +6420,19 @@ case Match_InvalidSVEPredicateSReg: case Match_InvalidSVEPredicateDReg: case Match_InvalidSVEPredicate3bAnyReg: + case Match_InvalidSMEPNPredicateB_p8_p15Reg: + case Match_InvalidSMEPNPredicateH_p8_p15Reg: + case Match_InvalidSMEPNPredicateS_p8_p15Reg: + case Match_InvalidSMEPNPredicateD_p8_p15Reg: + case Match_InvalidSMEPNPredicateAny_p8_p15Reg: case Match_InvalidSVEExactFPImmOperandHalfOne: case Match_InvalidSVEExactFPImmOperandHalfTwo: case Match_InvalidSVEExactFPImmOperandZeroOne: case Match_InvalidMatrixTile32: case Match_InvalidMatrixTile64: case Match_InvalidMatrix: + case Match_InvalidMatrix8: + case Match_InvalidMatrix16: case Match_InvalidMatrix32: case Match_InvalidMatrix64: case Match_InvalidMatrixTileVectorH8: @@ -6163,6 +6445,7 @@ case Match_InvalidMatrixTileVectorV32: case Match_InvalidMatrixTileVectorV64: case Match_InvalidMatrixTileVectorV128: + case Match_InvalidZT0: case Match_InvalidSVCR: case Match_InvalidMatrixIndexGPR32_12_15: case Match_InvalidMatrixIndexGPR32_8_11: @@ -6174,6 +6457,14 @@ case Match_InvalidSVEVectorListMul4x16: case Match_InvalidSVEVectorListMul4x32: case Match_InvalidSVEVectorListMul4x64: + case Match_InvalidSVEVectorListStrided2x8: + case Match_InvalidSVEVectorListStrided2x16: + case Match_InvalidSVEVectorListStrided2x32: + case Match_InvalidSVEVectorListStrided2x64: + case Match_InvalidSVEVectorListStrided4x8: + case Match_InvalidSVEVectorListStrided4x16: + case Match_InvalidSVEVectorListStrided4x32: + case Match_InvalidSVEVectorListStrided4x64: case Match_MSR: case Match_MRS: { if (ErrorInfo >= Operands.size()) diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -74,6 +74,7 @@ AArch64PBQPRegAlloc.cpp AArch64RegisterInfo.cpp AArch64SLSHardening.cpp + SMEPeepholeOpt.cpp AArch64SelectionDAGInfo.cpp AArch64SpeculationHardening.cpp AArch64StackTagging.cpp diff --git a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp --- a/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -123,6 +123,12 @@ static DecodeStatus DecodeZPR4Mul4RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeZPR2StridedRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeZPR4StridedRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); template static DecodeStatus DecodeMatrixTile(MCInst &Inst, unsigned RegNo, uint64_t Address, @@ -137,6 +143,9 @@ static DecodeStatus DecodePPR_3bRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const MCDisassembler *Decoder); +static DecodeStatus +DecodePPR_3b_p8_p15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, + const MCDisassembler *Decoder); static DecodeStatus DecodeFixedPointScaleImm32(MCInst &Inst, unsigned Imm, uint64_t Address, @@ -335,6 +344,9 @@ case AArch64::MPR8RegClassID: MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZAB0)); break; + case AArch64::ZT0RRegClassID: + MI.insert(MI.begin() + i, MCOperand::createReg(AArch64::ZT0)); + break; } } else if (Desc.OpInfo[i].OperandType == AArch64::OPERAND_IMPLICIT_IMM_0) { @@ -653,6 +665,30 @@ return Success; } +static DecodeStatus DecodeZPR2StridedRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 15) + return Fail; + unsigned Register = + AArch64MCRegisterClasses[AArch64::ZPR2StridedRegClassID].getRegister( + RegNo); + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + +static DecodeStatus DecodeZPR4StridedRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 7) + return Fail; + unsigned Register = + AArch64MCRegisterClasses[AArch64::ZPR4StridedRegClassID].getRegister( + RegNo); + Inst.addOperand(MCOperand::createReg(Register)); + return Success; +} + static DecodeStatus DecodeMatrixTileListRegisterClass(MCInst &Inst, unsigned RegMask, uint64_t Address, @@ -709,6 +745,16 @@ return DecodePPRRegisterClass(Inst, RegNo, Addr, Decoder); } +static DecodeStatus +DecodePPR_3b_p8_p15RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, + const MCDisassembler *Decoder) { + if (RegNo > 7) + return Fail; + + // Just reuse the PPR decode table + return DecodePPRRegisterClass(Inst, RegNo + 8, Addr, Decoder); +} + static DecodeStatus DecodeQQRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Addr, const MCDisassembler *Decoder) { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -525,11 +525,14 @@ } bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { + auto IsScalable = [](const Type *T) { + return isa(T) || T->isAArch64SvcountTy(); + }; + auto &F = MF.getFunction(); - if (isa(F.getReturnType())) - return true; - if (llvm::any_of(F.args(), [](const Argument &A) { - return isa(A.getType()); + if (IsScalable(F.getReturnType()) || + llvm::any_of(F.args(), [&IsScalable](const Argument &A) { + return IsScalable(A.getType()); })) return true; const auto &ST = MF.getSubtarget(); @@ -537,6 +540,9 @@ LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n"); return true; } + SMEAttrs CallerAttrs(F); + if (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()) + return true; return false; } diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.h @@ -126,7 +126,7 @@ void printAMNoIndex(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); - template + template void printImmScale(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); @@ -182,6 +182,10 @@ const MCSubtargetInfo &STI, raw_ostream &O); void printSIMDType10Operand(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O); + template + void printPredicateAsCounter(const MCInst *MI, unsigned OpNum, + const MCSubtargetInfo &STI, raw_ostream &O); + template void printComplexRotationOp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1193,6 +1193,26 @@ } } +template +void AArch64InstPrinter::printPredicateAsCounter(const MCInst *MI, + unsigned OpNum, + const MCSubtargetInfo &STI, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNum).getReg(); + + assert(Reg <= AArch64::P15 && "Unsupported predicate register"); + O << "pn" << (Reg - AArch64::P0); + switch (EltSize) { + case 0: break; + case 8: O << ".b"; break; + case 16: O << ".h"; break; + case 32: O << ".s"; break; + case 64: O << ".d"; break; + default: + llvm_unreachable("Unsupported element size"); + } +} + void AArch64InstPrinter::printCondCode(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { @@ -1215,12 +1235,15 @@ O << ']'; } -template +template void AArch64InstPrinter::printImmScale(const MCInst *MI, unsigned OpNum, const MCSubtargetInfo &STI, raw_ostream &O) { - O << markup("getOperand(OpNum).getImm()) << markup(">"); + if (PrintHash) { + O << markup("getOperand(OpNum).getImm()) << markup(">"); + } else + O << formatImm(Scale * MI->getOperand(OpNum).getImm()); } template @@ -1447,8 +1470,9 @@ // list). unsigned NumRegs = 1; if (MRI.getRegClass(AArch64::DDRegClassID).contains(Reg) || - MRI.getRegClass(AArch64::ZPR2RegClassID).contains(Reg) || - MRI.getRegClass(AArch64::QQRegClassID).contains(Reg)) + MRI.getRegClass(AArch64::ZPR2StridedRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::QQRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::ZPR2RegClassID).contains(Reg)) NumRegs = 2; else if (MRI.getRegClass(AArch64::DDDRegClassID).contains(Reg) || MRI.getRegClass(AArch64::ZPR3RegClassID).contains(Reg) || @@ -1456,9 +1480,16 @@ NumRegs = 3; else if (MRI.getRegClass(AArch64::DDDDRegClassID).contains(Reg) || MRI.getRegClass(AArch64::ZPR4RegClassID).contains(Reg) || - MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg)) + MRI.getRegClass(AArch64::QQQQRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::ZPR4StridedRegClassID).contains(Reg)) NumRegs = 4; + unsigned Stride = 1; + if (MRI.getRegClass(AArch64::ZPR2StridedRegClassID).contains(Reg)) + Stride = 8; + else if (MRI.getRegClass(AArch64::ZPR4StridedRegClassID).contains(Reg)) + Stride = 4; + // Now forget about the list and find out what the first register is. if (unsigned FirstReg = MRI.getSubReg(Reg, AArch64::dsub0)) Reg = FirstReg; @@ -1478,7 +1509,7 @@ if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg) && NumRegs > 1 && // Do not print the range when the last register is lower than the first. // Because it is a wrap-around register. - Reg < getNextVectorRegister(Reg, NumRegs - 1)) { + Reg < getNextVectorRegister(Reg, NumRegs - 1) && Stride == 1) { printRegName(O, Reg); O << LayoutSuffix; if (NumRegs > 1) { @@ -1489,9 +1520,12 @@ O << LayoutSuffix; } } else { - for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { + for (unsigned i = 0; i < NumRegs; + ++i, Reg = getNextVectorRegister(Reg, Stride)) { // wrap-around sve register - if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg)) + if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg) || + MRI.getRegClass(AArch64::ZPR2RegClassID).contains(Reg) || + MRI.getRegClass(AArch64::ZPR4RegClassID).contains(Reg)) printRegName(O, Reg); else printRegName(O, Reg, AArch64::vreg); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -190,6 +190,17 @@ SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + uint32_t EncodePPR_3b_p8_p15(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + uint32_t EncodeZPR2StridedRegisterClass(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t EncodeZPR4StridedRegisterClass(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + uint32_t EncodeMatrixTileListRegisterClass(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; @@ -533,6 +544,34 @@ return RegVal / Multiple; } +uint32_t +AArch64MCCodeEmitter::EncodePPR_3b_p8_p15(const MCInst &MI, unsigned OpIdx, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + auto RegOpnd = MI.getOperand(OpIdx).getReg(); + return RegOpnd - AArch64::P8; +} + +uint32_t AArch64MCCodeEmitter::EncodeZPR2StridedRegisterClass( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + auto RegOpnd = MI.getOperand(OpIdx).getReg(); + unsigned RegVal = Ctx.getRegisterInfo()->getEncodingValue(RegOpnd); + unsigned T = (RegVal & 0x10) >> 1; + unsigned Zt = RegVal & 0x7; + return T | Zt; +} + +uint32_t AArch64MCCodeEmitter::EncodeZPR4StridedRegisterClass( + const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + auto RegOpnd = MI.getOperand(OpIdx).getReg(); + unsigned RegVal = Ctx.getRegisterInfo()->getEncodingValue(RegOpnd); + unsigned T = (RegVal & 0x10) >> 2; + unsigned Zt = RegVal & 0x3; + return T | Zt; +} + uint32_t AArch64MCCodeEmitter::EncodeMatrixTileListRegisterClass( const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp --- a/llvm/lib/Target/AArch64/SMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp @@ -43,6 +43,17 @@ private: bool updateNewZAFunctions(Module *M, Function *F, IRBuilder<> &Builder); + bool handleExceptions(IRBuilder<> &Builder, Function *F, StructType *TPITy); + AllocaInst *createZABufferAndTPIDR2Block(IRBuilder<> &Builder, Function *F, + StructType *TPITy, + StringRef ObjName); + void setupLazySave(IRBuilder<> &Builder, Instruction *Call, + AllocaInst *TPIObj, BasicBlock *CheckBB, + BasicBlock *ResumeBB, BasicBlock *RestoreBB); + void restoreLazySave(IRBuilder<> &Builder, Instruction *Call, + AllocaInst *TPIObj, BasicBlock *RestoreBB, + BasicBlock *ResumeBB = nullptr, + Value *PStateOnEntry = nullptr); }; } // end anonymous namespace @@ -59,16 +70,52 @@ // Utility functions //===----------------------------------------------------------------------===// +// Utility function to emit a call to __arm_sme_state and return 'pstate.sm' value. +Value *emitGetPStateSM(Module *M, IRBuilder<> &Builder) { + auto *FTy = FunctionType::get( + StructType::create({Builder.getInt64Ty(), Builder.getInt64Ty()}), {}, + /*IsVarArgs=*/false); + auto Attrs = + AttributeList() + .addFnAttribute(M->getContext(), "aarch64_pstate_sm_compatible") + .addFnAttribute(M->getContext(), "aarch64_pstate_za_preserved"); + FunctionCallee Callee = M->getOrInsertFunction("__arm_sme_state", FTy, Attrs); + CallInst *Call = Builder.CreateCall(Callee); + Call->setCallingConv( + CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2); + + // Extract PSTATE.SM from X0 + Value *X0 = Builder.CreateExtractValue(Call, 0); + return Builder.CreateAnd(X0, Builder.getInt64(1)); +} + // Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0. +void emitTPIDR2Restore(Module *M, IRBuilder<> &Builder, Value *TPIDR2Obj) { + auto *FTy = FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, + /*IsVarArgs=*/false); + auto Attrs = + AttributeList() + .addFnAttribute(M->getContext(), "aarch64_pstate_sm_compatible") + .addFnAttribute(M->getContext(), "aarch64_pstate_za_shared"); + FunctionCallee Callee = M->getOrInsertFunction("__arm_tpidr2_restore", FTy, Attrs); + CallInst *Call = Builder.CreateCall( + Callee, Builder.CreatePointerCast(TPIDR2Obj, Builder.getInt8PtrTy())); + Call->setCallingConv( + CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0); +} + void emitTPIDR2Save(Module *M, IRBuilder<> &Builder) { auto *TPIDR2SaveTy = FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false); - auto Attrs = - AttributeList::get(M->getContext(), 0, {"aarch64_pstate_sm_compatible"}); + AttributeList() + .addFnAttribute(M->getContext(), "aarch64_pstate_sm_compatible") + .addFnAttribute(M->getContext(), "aarch64_pstate_za_preserved"); FunctionCallee Callee = M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs); - Builder.CreateCall(Callee); + CallInst *Call = Builder.CreateCall(Callee); + Call->setCallingConv( + CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0); // A save to TPIDR2 should be followed by clearing TPIDR2_EL0. Function *WriteIntr = @@ -77,6 +124,271 @@ Builder.getInt64(0)); } +void SMEABI::restoreLazySave( + IRBuilder<> &Builder, Instruction *Call, AllocaInst *TPIObj, + BasicBlock *RestoreBB, BasicBlock *ResumeBB, Value *PStateOnEntry) { + Module *M = Call->getModule(); + + // If Call is an Invoke, restore the lazy save in the normal destination. + // Otherwise, restore the lazy save immediately after Call. + if (auto *II = dyn_cast(Call)) + Builder.SetInsertPoint(II->getNormalDest()->getFirstNonPHIOrDbg()); + else + Builder.SetInsertPoint(Call->getParent(), std::next(Call->getIterator())); + + // Re-enable pstate.za. + Function *EnableZAIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable); + Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr); + + // Create an intrinsic call to restore ZA, passing the 64-bit data pointer + // to the TPIDR2 block. + if (ResumeBB) + Builder.SetInsertPoint(RestoreBB); + emitTPIDR2Restore(M, Builder, TPIObj); + if (ResumeBB) + Builder.CreateBr(ResumeBB); +} + +void SMEABI::setupLazySave(IRBuilder<> &Builder, Instruction *Call, + AllocaInst *TPIObj, BasicBlock *CheckBB, + BasicBlock *ResumeBB, BasicBlock *RestoreBB) { + Module *M = Call->getModule(); + Builder.SetInsertPoint(Call); + + // Store the number of live slices to the num_za_save_slices field + // of the TPIDR2 block + Function *LiveIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_live_za_slices); + auto *Live = + Builder.CreateCall(LiveIntr->getFunctionType(), LiveIntr, {}, "live"); + auto *Trunc = Builder.CreateTrunc(Live, Builder.getInt16Ty(), "live.trunc"); + auto *TPILive = Builder.CreateGEP(TPIObj->getAllocatedType(), TPIObj, + {Builder.getInt64(0), Builder.getInt32(1)}, + "tpidr2.obj.live"); + Builder.CreateStore(Trunc, TPILive); + + auto *PtrToInt = + Builder.CreatePtrToInt(TPIObj, Builder.getInt64Ty(), "tpi.int"); + + if (dyn_cast(Call)) { + // Restart pstate.za if this is an Invoke, as we may be setting up + // a lazy-save in the exception handler. + // TODO: This will start pstate.za unnecessarily if the parent block is + // not an unwind destination. It might be possible to improve this by + // creating a mapping of blocks to ZA/SM states. + Function *EnableZAIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_enable); + Builder.CreateCall(EnableZAIntr->getFunctionType(), EnableZAIntr); + } + + // Set TPIDR2_EL0 to the new object + Function *WriteIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_set_tpidr2); + Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr, PtrToInt); + + if (!CheckBB) { + // If no CheckBB block was passed in, Call should be an Invoke with shared + // or preserved ZA and we don't need to restore the lazy-save unless we + // catch an exception. Abandon the lazy-save before resuming the function + // at the normal destination. + auto *Invoke = dyn_cast(Call); + assert(Invoke && "CheckBB has not been provided for restoring lazy-save."); + auto *FirstInst = Invoke->getNormalDest()->getFirstNonPHIOrDbg(); + auto *II = dyn_cast(FirstInst); + // Avoid setting TPIDR2_EL0 to null more than once. + if (!II || II->getIntrinsicID() != Intrinsic::aarch64_sme_set_tpidr2) { + Builder.SetInsertPoint(FirstInst); + Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr, + ConstantInt::get(Builder.getInt64Ty(), 0)); + } + return; + } + + // Check if the lazy save has been committed by the callee and should + // be restored. + Builder.SetInsertPoint(CheckBB); + Function *GetEL0Intr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_get_tpidr2); + auto *GetEL0 = Builder.CreateCall(GetEL0Intr->getFunctionType(), GetEL0Intr, + {}, "tpidr2"); + auto *Cmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, GetEL0, + ConstantInt::get(GetEL0->getType(), 0), "cmp"); + Builder.CreateCondBr(Cmp, RestoreBB, ResumeBB); + + // Conditionally store ZA after the call. + restoreLazySave(Builder, Call, TPIObj, RestoreBB, ResumeBB); + + // Ensure we only set TPIDR2_EL0 to null once. + auto *FirstInst = ResumeBB->getFirstNonPHIOrDbg(); + auto *II = dyn_cast(FirstInst); + if (II && II->getIntrinsicID() == Intrinsic::aarch64_sme_set_tpidr2) + return; + + // Set TPIDR2_EL0 to null before continuing with the rest of the function. + // This will already be null if ZA was restored above, but is necessary + // to abandon the lazy-save if the callee did not commit it. + Builder.SetInsertPoint(FirstInst); + Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr, + ConstantInt::get(Builder.getInt64Ty(), 0)); + + return; +} + +AllocaInst *SMEABI::createZABufferAndTPIDR2Block( + IRBuilder<> &Builder, Function *F, StructType *TPITy, StringRef ObjName) { + Module *M = F->getParent(); + + Builder.SetInsertPoint(&*F->getEntryBlock().getFirstInsertionPt()); + auto *TPIObj = Builder.CreateAlloca(TPITy, nullptr, ObjName); + + // Allocate a buffer big enough to hold the max ZA size (SVL.B x SVL.B) + Function *NIntr = Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_cntsb); + auto *N = Builder.CreateCall(NIntr->getFunctionType(), NIntr, {}, "N"); + auto *NN = Builder.CreateMul(N, N, "NN"); + auto *Buffer = Builder.CreateAlloca(Builder.getInt8Ty(), NN, "buffer"); + Buffer->setAlignment(Align(16)); + + // Fill the za_save_buffer field of the new TPIDR2 block + auto *TPIBuffer = Builder.CreateGEP( + TPIObj->getAllocatedType(), TPIObj, + {Builder.getInt64(0), Builder.getInt32(0)}, ObjName + ".buffer"); + Builder.CreateStore(Buffer, TPIBuffer); + + return TPIObj; +} + +bool SMEABI::handleExceptions(IRBuilder<> &Builder, Function *F, + StructType *TPITy) { + Module *M = F->getParent(); + LLVMContext &Context = F->getContext(); + SmallVector Invokes; + SmallVector Resumes; + SmallVector BeginCatchCalls; + SMEAttrs FnAttrs(*F); + + AllocaInst *TPIObj = nullptr; + + for (BasicBlock &BB : F->getBasicBlockList()) { + for (Instruction &I : BB) { + if (auto *Invoke = dyn_cast(&I)) + Invokes.push_back(Invoke); + else if (auto *Resume = dyn_cast(&I)) + Resumes.push_back(Resume); + else if (auto *Call = dyn_cast(&I)) { + if (Call->getCalledFunction() && + Call->getCalledFunction()->getName() == "__cxa_begin_catch") + BeginCatchCalls.push_back(Call); + } + } + } + + for (InvokeInst *Invoke : Invokes) { + // We only need to set up and restore a lazy-save if there is ZA state. + if (!FnAttrs.hasZAState()) + continue; + + if (!TPIObj) + TPIObj = + createZABufferAndTPIDR2Block(Builder, F, TPITy, "tpidr2.invoke.obj"); + + SMEAttrs InvokeAttrs(*Invoke); + if (InvokeAttrs.hasSharedZAInterface() || InvokeAttrs.preservesZA()) { + // If the Invoke instruction is shared or preserved ZA, setupLazySave + // does not need to restore the lazy-save at the normal destination. + setupLazySave(Builder, Invoke, TPIObj, nullptr, nullptr, nullptr); + } else { + // Otherwise, set up new blocks for restoring ZA if the Invoke was + // successful. Create a new block to read the value of TPIDR2 (CheckBB), + // which becomes the new normal destination for the instruction. + // Create another block to restore ZA (RestoreBB). ResumeBB is the + // original NormalDest for the Invoke. + auto *InvokeBB = Invoke->getParent(); + auto *ResumeBB = Invoke->getNormalDest(); + auto *CheckBB = BasicBlock::Create(Context, "check.za", F, ResumeBB); + Invoke->setNormalDest(CheckBB); + auto *RestoreBB = BasicBlock::Create(Context, "restore.za", F, ResumeBB); + // Update any successor PHI nodes to match the new blocks. + for (PHINode &PN : ResumeBB->phis()) { + PN.replaceIncomingBlockWith(InvokeBB, CheckBB); + PN.addIncoming(PN.getIncomingValueForBlock(CheckBB), RestoreBB); + } + ResumeBB->replaceSuccessorsPhiUsesWith(InvokeBB, ResumeBB); + // Set-up the lazy save for this Invoke. This also handles restoring the + // lazy save for the NormalDest. + setupLazySave(Builder, Invoke, TPIObj, CheckBB, ResumeBB, RestoreBB); + } + + if (!InvokeAttrs.hasNewZAInterface()) { + // New ZA functions are the only function types which will commit + // a lazy-save. For Invokes to any other type of function, create a + // call which saves ZA to ensure that we restore the correct state + // should the callee throw an unhandled exception that unwinds back + // to the caller. + Builder.SetInsertPoint(Invoke); + emitTPIDR2Save(M, Builder); + + // Clear TPIDR2 to set ensure we are in the active ZA state + if (!InvokeAttrs.hasSharedZAInterface()) + continue; + Function *WriteIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_set_tpidr2); + Builder.CreateCall(WriteIntr->getFunctionType(), WriteIntr, + Builder.getInt64(0)); + } + } + + // We may need to restart streaming-mode depending on the starting value + // of pstate.sm on entry to this function (if streaming-compatible). + // Create a call to get this value in the entry block. + Value *PStateOnEntry = ConstantInt::get(Builder.getInt64Ty(), 1); + if (!Invokes.empty() && FnAttrs.hasStreamingCompatibleInterface()) { + Builder.SetInsertPoint(&*F->getEntryBlock().getFirstInsertionPt()); + PStateOnEntry = emitGetPStateSM(M, Builder); + } + + if (TPIObj) { + // Ensure we stop pstate.za before calling the resume instruction. + // TODO: We can improve this by not restoring the lazy-save & restarting + // pstate.za in the first place before blocks which terminate in a resume. + for (ResumeInst *Resume : Resumes) { + Builder.SetInsertPoint(Resume); + Function *DisableZAIntr = + Intrinsic::getDeclaration(M, Intrinsic::aarch64_sme_za_disable); + Builder.CreateCall(DisableZAIntr->getFunctionType(), DisableZAIntr); + } + } + + // Restore state at the beginning of each catch block. + for (Instruction *Catch : BeginCatchCalls) { + // Restore the lazy-save if there is ZA state. + if (FnAttrs.hasZAState()) + restoreLazySave(Builder, Catch, TPIObj, Catch->getParent(), + /*ResumeBB*/ nullptr, PStateOnEntry); + + // Restart streaming-mode in the catch block if necessary. + if (FnAttrs.requiresSMChange(SMEAttrs())) { + auto *FirstInst = Catch->getParent()->getFirstNonPHIOrDbg(); + // Ensure that any catch blocks which are also landing pads keep the + // LandingPadInst as the first instruction in the block. + if (isa(FirstInst)) + FirstInst = FirstInst->getNextNonDebugInstruction(); + Builder.SetInsertPoint(FirstInst); + Function *EnableSMIntr = Intrinsic::getDeclaration( + M, Intrinsic::aarch64_sme_invoke_resume_pstatesm); + Builder.CreateCall(EnableSMIntr->getFunctionType(), EnableSMIntr, + PStateOnEntry); + } + } + + if (TPIObj || !FnAttrs.hasZAState()) { + F->addFnAttr("aarch64_expanded_pstate_za"); + return true; + } + + return false; +} + /// This function generates code to commit a lazy save at the beginning of a /// function marked with `aarch64_pstate_za_new`. If the value read from /// TPIDR2_EL0 is not null on entry to the function then the lazy-saving scheme @@ -132,6 +444,10 @@ LLVMContext &Context = F.getContext(); IRBuilder<> Builder(Context); + StructType *TPITy = + StructType::get(Context, {Builder.getInt8PtrTy(), Builder.getInt16Ty(), + ArrayType::get(Builder.getInt8Ty(), 6)}); + if (F.isDeclaration() || F.hasFnAttribute("aarch64_expanded_pstate_za")) return false; @@ -140,5 +456,8 @@ if (FnAttrs.hasNewZAInterface()) Changed |= updateNewZAFunctions(M, &F, Builder); + if (FnAttrs.hasZAState() || FnAttrs.requiresSMChange(SMEAttrs())) + Changed |= handleExceptions(Builder, &F, TPITy); + return Changed; } diff --git a/llvm/lib/Target/AArch64/SMEInstrFormats.td b/llvm/lib/Target/AArch64/SMEInstrFormats.td --- a/llvm/lib/Target/AArch64/SMEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SMEInstrFormats.td @@ -10,32 +10,177 @@ // //===----------------------------------------------------------------------===// -def imm_to_tile8 : ComplexPattern", []>; -def imm_to_tile16 : ComplexPattern", []>; -def imm_to_tile32 : ComplexPattern", []>; -def imm_to_tile64 : ComplexPattern", []>; -def imm_to_tile128 : ComplexPattern", []>; - -def tileslice8 : ComplexPattern", []>; -def tileslice16 : ComplexPattern", []>; -def tileslice32 : ComplexPattern", []>; -def tileslice64 : ComplexPattern", []>; -def tileslice128 : ComplexPattern", []>; // nop +def imm_to_tile8 : ComplexPattern", []>; +def imm_to_tile16 : ComplexPattern", []>; +def imm_to_tile32 : ComplexPattern", []>; +def imm_to_tile64 : ComplexPattern", []>; +def imm_to_tile128 : ComplexPattern", []>; + +def tileslice8 : ComplexPattern", []>; +def tileslice16 : ComplexPattern", []>; +def tileslice32 : ComplexPattern", []>; +def tileslice64 : ComplexPattern", []>; +def tileslice128 : ComplexPattern", []>; // nop + +def tileslicerange3s2 : ComplexPattern", []>; +def tileslicerange2s2 : ComplexPattern", []>; +def tileslicerange1s2 : ComplexPattern", []>; +def tileslicerange0s2 : ComplexPattern", []>; + +def tileslicerange2s4 : ComplexPattern", []>; +def tileslicerange1s4 : ComplexPattern", []>; +def tileslicerange0s4 : ComplexPattern", []>; def am_sme_indexed_b4 :ComplexPattern", [], [SDNPWantRoot]>; //===----------------------------------------------------------------------===// -// SME Outer Products +// SME Pseudo Classes //===----------------------------------------------------------------------===// -class sme_outer_product_pseudo - : Pseudo<(outs), (ins i64imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, +def getSMEPseudoMap : InstrMapping { + let FilterClass = "SMEPseudo2Instr"; + let RowFields = ["PseudoName"]; + let ColFields = ["IsInstr"]; + let KeyCol = ["0"]; + let ValueCols = [["1"]]; +} + +class SMEPseudo2Instr { + string PseudoName = name; + bit IsInstr = instr; +} + +class sme_outer_product_pseudo + : Pseudo<(outs), (ins i32imm:$tile, PPR3bAny:$pn, PPR3bAny:$pm, zpr_ty:$zn, zpr_ty:$zm), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + +class sme2_za_array_2op_multi_single_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + +class sme2_za_array_2op_multi_multi_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + +class sme2_za_array_2op_multi_index_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm3, multi_vector_ty:$Zn, zpr_ty:$Zm, imm_ty:$i), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + +class sme2_move_to_za_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins MatrixIndexGPR32Op8_11:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { + let SMEMatrixType = za_flag; + let usesCustomInserter = 1; +} + +class sme2_move_to_tile_pseudo + : SMEPseudo2Instr, + Pseudo<(outs), (ins imm_ty:$tile, MatrixIndexGPR32Op12_15:$Rs, imm_ty:$imm, multi_vector_ty:$Zn), []> { + let SMEMatrixType = za_flag; let usesCustomInserter = 1; } +//===----------------------------------------------------------------------===// +// SME pattern match helpers. +//===----------------------------------------------------------------------===// + +class SME2_ZA_TwoOp_Multi_Single_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm), + (!cast(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm)>; + +class SME2_ZA_TwoOp_VG2_Multi_Single_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm), + (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), + zpr_ty:$Zm)>; + +class SME2_ZA_TwoOp_VG4_Multi_Single_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm), + (!cast(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), + zpr_ty:$Zm)>; + +class SME2_ZA_TwoOp_VG2_Multi_Multi_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm1, vt:$Zm2), + (!cast(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), + (REG_SEQUENCE ZPR2Mul2, vt:$Zm1, zsub0, vt:$Zm2, zsub1))>; + +class SME2_ZA_TwoOp_VG4_Multi_Multi_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm1, vt:$Zm2, vt:$Zm3, vt:$Zm4), + (!cast(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), + (REG_SEQUENCE ZPR4Mul4, vt:$Zm1, zsub0, vt:$Zm2, zsub1, vt:$Zm3, zsub2, vt:$Zm4, zsub3))>; + +class SME2_ZA_TwoOp_Multi_Index_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn, vt:$Zm, (i32 imm_ty:$i)), + (!cast(name # _PSEUDO) $base, $offset, vt:$Zn, zpr_ty:$Zm, (i32 imm_ty:$i))>; + +class SME2_ZA_TwoOp_VG2_Multi_Index_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zm, (i32 imm_ty:$i)), + (!cast(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1), zpr_ty:$Zm, imm_ty:$i)>; + +class SME2_ZA_TwoOp_VG4_Multi_Index_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), + vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4, vt:$Zm, (i32 imm_ty:$i)), + (!cast(name # _PSEUDO) $base, $offset, + (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3), + zpr_ty:$Zm, imm_ty:$i)>; + +class SME2_Sat_Shift_VG2_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, (i32 imm_ty:$i))), + (!cast(name) (REG_SEQUENCE ZPR2, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1), imm_ty:$i)>; + +class SME2_Sat_Shift_VG4_Pat + : Pat<(out_vt (intrinsic in_vt:$Zn1, in_vt:$Zn2, in_vt:$Zn3, in_vt:$Zn4, (i32 imm_ty:$i))), + (!cast(name) (REG_SEQUENCE ZPR4, in_vt:$Zn1, zsub0, in_vt:$Zn2, zsub1, in_vt:$Zn3, zsub2, in_vt:$Zn4, zsub3), + imm_ty:$i)>; + +class SME2_ZA_VG1x2_Multi_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), + (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; + +class SME2_ZA_VG1x4_Multi_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op8_11:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), + (!cast(name # _PSEUDO) $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; + +class SME2_Tile_VG2_Multi_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2), + (!cast(name # _PSEUDO) 0, $base, $offset, (REG_SEQUENCE ZPR2Mul2, vt:$Zn1, zsub0, vt:$Zn2, zsub1))>; + +class SME2_Tile_VG4_Multi_Pat + : Pat<(intrinsic (i32 (tileslice MatrixIndexGPR32Op12_15:$base, index_ty:$offset)), vt:$Zn1, vt:$Zn2, vt:$Zn3, vt:$Zn4), + (!cast(name # _PSEUDO) 0, $base, $offset, (REG_SEQUENCE ZPR4Mul4, vt:$Zn1, zsub0, vt:$Zn2, zsub1, vt:$Zn3, zsub2, vt:$Zn4, zsub3))>; + +//===----------------------------------------------------------------------===// +// SME Outer Products +//===----------------------------------------------------------------------===// + class sme_fp_outer_product_inst : I<(outs za_ty:$ZAda), @@ -61,30 +206,30 @@ } multiclass sme_outer_product_fp32 { - def NAME : sme_fp_outer_product_inst { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } - def NAME # _PSEUDO : sme_outer_product_pseudo; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; - def : Pat<(op imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), (nxv4f32 ZPR32:$zn), (nxv4f32 ZPR32:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } multiclass sme_outer_product_fp64 { - def NAME : sme_fp_outer_product_inst { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } - def NAME # _PSEUDO : sme_outer_product_pseudo; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; - def : Pat<(op imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), (nxv2f64 ZPR64:$zn), (nxv2f64 ZPR64:$zm)), - (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_int_outer_product_inst opc, string mnemonic, SDPatternOperator op> { def NAME : sme_int_outer_product_inst { + ZPR8, mnemonic>, SMEPseudo2Instr { bits<2> ZAda; let Inst{1-0} = ZAda; let Inst{2} = 0b0; } - def NAME # _PSEUDO : sme_outer_product_pseudo; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; - def : Pat<(op imm0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv16i1 PPR3bAny:$pn), (nxv16i1 PPR3bAny:$pm), (nxv16i8 ZPR8:$zn), (nxv16i8 ZPR8:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } multiclass sme_int_outer_product_i64 opc, string mnemonic, SDPatternOperator op> { def NAME : sme_int_outer_product_inst { + ZPR16, mnemonic>, SMEPseudo2Instr { bits<3> ZAda; let Inst{2-0} = ZAda; } - def NAME # _PSEUDO : sme_outer_product_pseudo; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; - def : Pat<(op imm0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_7:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), (nxv8i16 ZPR16:$zn), (nxv8i16 ZPR16:$zm)), - (!cast(NAME # _PSEUDO) imm0_7:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_7:$tile, $pn, $pm, $zn, $zm)>; } class sme_outer_product_widening_inst @@ -170,23 +315,23 @@ } multiclass sme_bf16_outer_product { - def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>; + def NAME : sme_outer_product_widening_inst<0b0, S, mnemonic>, SMEPseudo2Instr; - def NAME # _PSEUDO : sme_outer_product_pseudo; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; - def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), (nxv8bf16 ZPR16:$zn), (nxv8bf16 ZPR16:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } multiclass sme_f16_outer_product { - def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>; + def NAME : sme_outer_product_widening_inst<0b1, S, mnemonic>, SMEPseudo2Instr; - def NAME # _PSEUDO : sme_outer_product_pseudo; + def NAME # _PSEUDO : sme_outer_product_pseudo, SMEPseudo2Instr; - def : Pat<(op imm0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv8i1 PPR3bAny:$pn), (nxv8i1 PPR3bAny:$pm), (nxv8f16 ZPR16:$zn), (nxv8f16 ZPR16:$zm)), - (!cast(NAME # _PSEUDO) imm0_3:$tile, $pn, $pm, $zn, $zm)>; + (!cast(NAME # _PSEUDO) timm32_0_3:$tile, $pn, $pm, $zn, $zm)>; } //===----------------------------------------------------------------------===// @@ -214,51 +359,42 @@ let Constraints = "$ZAda = $_ZAda"; } -class sme_add_vector_to_tile_u32 - : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic> { - bits<2> ZAda; - let Inst{2} = 0b0; - let Inst{1-0} = ZAda; -} - -class sme_add_vector_to_tile_u64 - : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic> { - bits<3> ZAda; - let Inst{2-0} = ZAda; -} - -class sme_add_vector_to_tile_pseudo +class sme_add_vector_to_tile_pseudo : Pseudo<(outs), - (ins i64imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, + (ins i32imm:$tile, PPR3bAny:$Pn, PPR3bAny:$Pm, zpr_ty:$Zn), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp + let SMEMatrixType = za_flag; let usesCustomInserter = 1; } -def ADDHA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; -def ADDVA_MPPZ_PSEUDO_S : sme_add_vector_to_tile_pseudo; +multiclass sme_add_vector_to_tile_u32 { + def NAME : sme_add_vector_to_tile_inst<0b0, V, TileOp32, ZPR32, mnemonic>, SMEPseudo2Instr { + bits<2> ZAda; + let Inst{2} = 0b0; + let Inst{1-0} = ZAda; + } + + def _PSEUDO_S : sme_add_vector_to_tile_pseudo, SMEPseudo2Instr; -def : Pat<(int_aarch64_sme_addha - imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), + def : Pat<(op timm32_0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), (nxv4i32 ZPR32:$zn)), - (ADDHA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; -def : Pat<(int_aarch64_sme_addva - imm0_3:$tile, (nxv4i1 PPR3bAny:$pn), (nxv4i1 PPR3bAny:$pm), - (nxv4i32 ZPR32:$zn)), - (ADDVA_MPPZ_PSEUDO_S imm0_3:$tile, $pn, $pm, $zn)>; + (!cast(NAME # _PSEUDO_S) timm32_0_3:$tile, $pn, $pm, $zn)>; +} + +multiclass sme_add_vector_to_tile_u64 { + def NAME : sme_add_vector_to_tile_inst<0b1, V, TileOp64, ZPR64, mnemonic>, SMEPseudo2Instr { + bits<3> ZAda; + let Inst{2-0} = ZAda; + } -let Predicates = [HasSMEI16I64] in { -def ADDHA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; -def ADDVA_MPPZ_PSEUDO_D : sme_add_vector_to_tile_pseudo; + def _PSEUDO_D : sme_add_vector_to_tile_pseudo, SMEPseudo2Instr; -def : Pat<(int_aarch64_sme_addha - imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), - (nxv2i64 ZPR64:$zn)), - (ADDHA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; -def : Pat<(int_aarch64_sme_addva - imm0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), - (nxv2i64 ZPR64:$zn)), - (ADDVA_MPPZ_PSEUDO_D imm0_7:$tile, $pn, $pm, $zn)>; + let Predicates = [HasSMEI16I64] in { + def : Pat<(op timm32_0_7:$tile, (nxv2i1 PPR3bAny:$pn), (nxv2i1 PPR3bAny:$pm), + (nxv2i64 ZPR64:$zn)), + (!cast(NAME # _PSEUDO_D) timm32_0_7:$tile, $pn, $pm, $zn)>; + } } //===----------------------------------------------------------------------===// @@ -350,8 +486,8 @@ } class sme_load_pseudo - : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, - i64imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, + : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, + i32imm:$imm, PPR3bAny:$pg, GPR64sp:$base, GPR64:$offset), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; @@ -409,27 +545,27 @@ defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_B), !if(is_col, int_aarch64_sme_ld1b_vert, int_aarch64_sme_ld1b_horiz), - sme_elm_idx0_0, imm0_15, am_sve_regreg_lsl0, + sme_elm_idx0_0, timm32_0_15, am_sve_regreg_lsl0, tileslice8>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_H), !if(is_col, int_aarch64_sme_ld1h_vert, int_aarch64_sme_ld1h_horiz), - imm0_1, imm0_7, am_sve_regreg_lsl1, + timm32_0_1, timm32_0_7, am_sve_regreg_lsl1, tileslice16>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_S), !if(is_col, int_aarch64_sme_ld1w_vert, int_aarch64_sme_ld1w_horiz), - imm0_3, imm0_3, am_sve_regreg_lsl2, + timm32_0_3, timm32_0_3, am_sve_regreg_lsl2, tileslice32>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_D), !if(is_col, int_aarch64_sme_ld1d_vert, int_aarch64_sme_ld1d_horiz), - imm0_7, imm0_1, am_sve_regreg_lsl3, + timm32_0_7, timm32_0_1, am_sve_regreg_lsl3, tileslice64>; defm : sme_mem_ld_ss_patterns(NAME # _PSEUDO_Q), !if(is_col, int_aarch64_sme_ld1q_vert, int_aarch64_sme_ld1q_horiz), - imm0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, + timm32_0_15, sme_elm_idx0_0, am_sve_regreg_lsl4, tileslice128>; } @@ -539,22 +675,22 @@ defm : sme_mem_st_ss_patterns(NAME # _B), !if(is_col, int_aarch64_sme_st1b_vert, int_aarch64_sme_st1b_horiz), - imm0_15, imm_to_tile8, am_sve_regreg_lsl0, + timm32_0_15, imm_to_tile8, am_sve_regreg_lsl0, tileslice8>; defm : sme_mem_st_ss_patterns(NAME # _H), !if(is_col, int_aarch64_sme_st1h_vert, int_aarch64_sme_st1h_horiz), - imm0_7, imm_to_tile16, am_sve_regreg_lsl1, + timm32_0_7, imm_to_tile16, am_sve_regreg_lsl1, tileslice16>; defm : sme_mem_st_ss_patterns(NAME # _S), !if(is_col, int_aarch64_sme_st1w_vert, int_aarch64_sme_st1w_horiz), - imm0_3, imm_to_tile32, am_sve_regreg_lsl2, + timm32_0_3, imm_to_tile32, am_sve_regreg_lsl2, tileslice32>; defm : sme_mem_st_ss_patterns(NAME # _D), !if(is_col, int_aarch64_sme_st1d_vert, int_aarch64_sme_st1d_horiz), - imm0_1, imm_to_tile64, am_sve_regreg_lsl3, + timm32_0_1, imm_to_tile64, am_sve_regreg_lsl3, tileslice64>; defm : sme_mem_st_ss_patterns(NAME # _Q), !if(is_col, int_aarch64_sme_st1q_vert, @@ -688,35 +824,33 @@ Operand offset_ty, SDPatternOperator op, ComplexPattern tileslice> { - def : Pat<(op imm_ty:$tile, MatrixIndexGPR32Op12_15:$idx, + def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, + offset_ty:$imm)), (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), - (inst imm_ty:$tile, $idx, 0, $pg, $zn)>; - let AddedComplexity = 1 in { - def : Pat<(op imm_ty:$tile, (i32 (tileslice MatrixIndexGPR32Op12_15:$idx, - offset_ty:$imm)), - (ppr_vt PPR3bAny:$pg), (zpr_vt ZPRAny:$zn)), - (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; - } + (inst imm_ty:$tile, $idx, $imm, $pg, $zn)>; } -class sme_mova_insert_pseudo - : Pseudo<(outs), (ins i64imm:$tile, MatrixIndexGPR32Op12_15:$idx, - i64imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, +class sme_mova_insert_pseudo + : Pseudo<(outs), (ins i32imm:$tile, MatrixIndexGPR32Op12_15:$idx, + i32imm:$imm, PPR3bAny:$pg, ZPRAny:$zn), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp + let SMEMatrixType = za_flag; let usesCustomInserter = 1; } multiclass sme_vector_v_to_tile { def _B : sme_vector_to_tile_inst<0b0, 0b00, !if(is_col, TileVectorOpV8, TileVectorOpH8), - is_col, sme_elm_idx0_15, ZPR8, mnemonic> { + is_col, sme_elm_idx0_15, ZPR8, mnemonic>, + SMEPseudo2Instr { bits<4> imm; let Inst{3-0} = imm; } def _H : sme_vector_to_tile_inst<0b0, 0b01, !if(is_col, TileVectorOpV16, TileVectorOpH16), - is_col, sme_elm_idx0_7, ZPR16, mnemonic> { + is_col, sme_elm_idx0_7, ZPR16, mnemonic>, + SMEPseudo2Instr { bits<1> ZAd; bits<3> imm; let Inst{3} = ZAd; @@ -724,7 +858,8 @@ } def _S : sme_vector_to_tile_inst<0b0, 0b10, !if(is_col, TileVectorOpV32, TileVectorOpH32), - is_col, sme_elm_idx0_3, ZPR32, mnemonic> { + is_col, sme_elm_idx0_3, ZPR32, mnemonic>, + SMEPseudo2Instr { bits<2> ZAd; bits<2> imm; let Inst{3-2} = ZAd; @@ -732,7 +867,8 @@ } def _D : sme_vector_to_tile_inst<0b0, 0b11, !if(is_col, TileVectorOpV64, TileVectorOpH64), - is_col, sme_elm_idx0_1, ZPR64, mnemonic> { + is_col, sme_elm_idx0_1, ZPR64, mnemonic>, + SMEPseudo2Instr { bits<3> ZAd; bits<1> imm; let Inst{3-1} = ZAd; @@ -740,7 +876,8 @@ } def _Q : sme_vector_to_tile_inst<0b1, 0b11, !if(is_col, TileVectorOpV128, TileVectorOpH128), - is_col, sme_elm_idx0_0, ZPR128, mnemonic> { + is_col, sme_elm_idx0_0, ZPR128, mnemonic>, + SMEPseudo2Instr { bits<4> ZAd; bits<1> imm; let Inst{3-0} = ZAd; @@ -748,11 +885,11 @@ // Pseudo instructions for lowering intrinsics, using immediates instead of // tile registers. - def _PSEUDO_B : sme_mova_insert_pseudo; - def _PSEUDO_H : sme_mova_insert_pseudo; - def _PSEUDO_S : sme_mova_insert_pseudo; - def _PSEUDO_D : sme_mova_insert_pseudo; - def _PSEUDO_Q : sme_mova_insert_pseudo; + def _PSEUDO_B : sme_mova_insert_pseudo, SMEPseudo2Instr; + def _PSEUDO_H : sme_mova_insert_pseudo, SMEPseudo2Instr; + def _PSEUDO_S : sme_mova_insert_pseudo, SMEPseudo2Instr; + def _PSEUDO_D : sme_mova_insert_pseudo, SMEPseudo2Instr; + def _PSEUDO_Q : sme_mova_insert_pseudo, SMEPseudo2Instr; defm : sme_vector_to_tile_aliases(NAME # _B), !if(is_col, TileVectorOpV8, @@ -779,28 +916,28 @@ int_aarch64_sme_write_horiz); defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_B), - nxv16i8, nxv16i1, sme_elm_idx0_0, imm0_15, + nxv16i8, nxv16i1, sme_elm_idx0_0, sme_elm_idx0_15, op, tileslice8>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), - nxv8i16, nxv8i1, sme_elm_idx0_1, imm0_7, + nxv8i16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), - nxv8f16, nxv8i1, sme_elm_idx0_1, imm0_7, + nxv8f16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_H), - nxv8bf16, nxv8i1, sme_elm_idx0_1, imm0_7, + nxv8bf16, nxv8i1, sme_elm_idx0_1, sme_elm_idx0_7, op, tileslice16>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), - nxv4i32, nxv4i1, sme_elm_idx0_3, imm0_3, + nxv4i32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, op, tileslice32>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_S), - nxv4f32, nxv4i1, sme_elm_idx0_3, imm0_3, + nxv4f32, nxv4i1, sme_elm_idx0_3, sme_elm_idx0_3, op, tileslice32>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), - nxv2i64, nxv2i1, sme_elm_idx0_7, imm0_1, + nxv2i64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, op, tileslice64>; defm : sme_vector_to_tile_patterns(NAME # _PSEUDO_D), - nxv2f64, nxv2i1, sme_elm_idx0_7, imm0_1, + nxv2f64, nxv2i1, sme_elm_idx0_7, sme_elm_idx0_1, op, tileslice64>; defvar opq = !if(is_col, int_aarch64_sme_writeq_vert, @@ -946,28 +1083,28 @@ int_aarch64_sme_read_horiz); defm : sme_tile_to_vector_patterns(NAME # _B), - nxv16i8, nxv16i1, imm0_15, + nxv16i8, nxv16i1, sme_elm_idx0_15, imm_to_tile8, tileslice8, op>; defm : sme_tile_to_vector_patterns(NAME # _H), - nxv8i16, nxv8i1, imm0_7, + nxv8i16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _H), - nxv8f16, nxv8i1, imm0_7, + nxv8f16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _H), - nxv8bf16, nxv8i1, imm0_7, + nxv8bf16, nxv8i1, sme_elm_idx0_7, imm_to_tile16, tileslice16, op>; defm : sme_tile_to_vector_patterns(NAME # _S), - nxv4i32, nxv4i1, imm0_3, + nxv4i32, nxv4i1, sme_elm_idx0_3, imm_to_tile32, tileslice32, op>; defm : sme_tile_to_vector_patterns(NAME # _S), - nxv4f32, nxv4i1, imm0_3, + nxv4f32, nxv4i1, sme_elm_idx0_3, imm_to_tile32, tileslice32, op>; defm : sme_tile_to_vector_patterns(NAME # _D), - nxv2i64, nxv2i1, imm0_1, + nxv2i64, nxv2i1, sme_elm_idx0_1, imm_to_tile64, tileslice64, op>; defm : sme_tile_to_vector_patterns(NAME # _D), - nxv2f64, nxv2i1, imm0_1, + nxv2f64, nxv2i1, sme_elm_idx0_1, imm_to_tile64, tileslice64, op>; defvar opq = !if(is_col, int_aarch64_sme_readq_vert, @@ -1038,14 +1175,14 @@ def : InstAlias<"zero\t\\{za0.s,za2.s,za3.s\\}", (!cast(NAME) 0b11011101), 1>; def : InstAlias<"zero\t\\{za1.s,za2.s,za3.s\\}", (!cast(NAME) 0b11101110), 1>; - def NAME # _PSEUDO : Pseudo<(outs), (ins i64imm:$tilelist), []>, + def NAME # _PSEUDO : Pseudo<(outs), (ins i32imm:$tilelist), []>, Sched<[]> { // Translated to the actual instructions in AArch64ISelLowering.cpp let usesCustomInserter = 1; } - def : Pat<(int_aarch64_sme_zero imm:$imm), - (!cast(NAME # _PSEUDO) imm:$imm)>; + def : Pat<(int_aarch64_sme_zero timm32_0_255:$imm), + (!cast(NAME # _PSEUDO) timm32_0_255:$imm)>; } //===----------------------------------------------------------------------===// @@ -1222,33 +1359,49 @@ let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_mla_add_sub_array_vg2_single_S op>{ +multiclass sme2_mla_add_sub_array_vg2_single_S op, SDPatternOperator intrinsic>{ def NAME : sme2_mla_add_sub_array_vg24_single<0b0, 0b0, op, MatrixOp32, ZZ_s, - ZPR4b32, mnemonic>; + ZPR4b32, mnemonic>, SMEPseudo2Instr; + + def NAME # _PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME) MatrixOp32:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_s:$Zn, ZPR4b32:$Zm), 0>; } -multiclass sme2_mla_add_sub_array_vg2_single_D op>{ +multiclass sme2_mla_add_sub_array_vg2_single_D op, SDPatternOperator intrinsic>{ def NAME : sme2_mla_add_sub_array_vg24_single<0b0, 0b1, op, MatrixOp64, - ZZ_d, ZPR4b64, mnemonic>; + ZZ_d, ZPR4b64, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME) MatrixOp64:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_d:$Zn, ZPR4b64:$Zm), 0>; } -multiclass sme2_mla_add_sub_array_vg4_single_S op>{ +multiclass sme2_mla_add_sub_array_vg4_single_S op, SDPatternOperator intrinsic>{ def NAME : sme2_mla_add_sub_array_vg24_single<0b1, 0b0, op, MatrixOp32, ZZZZ_s, - ZPR4b32, mnemonic>; + ZPR4b32, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME) MatrixOp32:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_s:$Zn, ZPR4b32:$Zm), 0>; } -multiclass sme2_mla_add_sub_array_vg4_single_D op>{ +multiclass sme2_mla_add_sub_array_vg4_single_D op, SDPatternOperator intrinsic>{ def NAME : sme2_mla_add_sub_array_vg24_single<0b1, 0b1, op, MatrixOp64, ZZZZ_d, - ZPR4b64, mnemonic>; + ZPR4b64, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME) MatrixOp64:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_d:$Zn, ZPR4b64:$Zm), 0>; @@ -1283,15 +1436,23 @@ let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_mla_add_sub_array_vg2_multi_S op>{ - def NAME : sme2_mla_add_sub_array_vg2_multi<0b0, op, MatrixOp32, ZZ_s_mul_r, mnemonic>; +multiclass sme2_mla_add_sub_array_vg2_multi_S op, SDPatternOperator intrinsic>{ + def NAME : sme2_mla_add_sub_array_vg2_multi<0b0, op, MatrixOp32, ZZ_s_mul_r, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME) MatrixOp32:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_s_mul_r:$Zn, ZZ_s_mul_r:$Zm), 0>; } -multiclass sme2_mla_add_sub_array_vg2_multi_D op>{ - def NAME : sme2_mla_add_sub_array_vg2_multi<0b1, op, MatrixOp64, ZZ_d_mul_r, mnemonic>; +multiclass sme2_mla_add_sub_array_vg2_multi_D op, SDPatternOperator intrinsic>{ + def NAME : sme2_mla_add_sub_array_vg2_multi<0b1, op, MatrixOp64, ZZ_d_mul_r, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME) MatrixOp64:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_d_mul_r:$Zn, ZZ_d_mul_r:$Zm), 0>; @@ -1324,15 +1485,23 @@ let Constraints = "$ZAd = $_ZAd"; } -multiclass sme2_mla_add_sub_array_vg4_multi_S op>{ - def NAME : sme2_mla_add_sub_array_vg4_multi_base<0b0, op, MatrixOp32, ZZZZ_s_mul_r, mnemonic>; +multiclass sme2_mla_add_sub_array_vg4_multi_S op, SDPatternOperator intrinsic>{ + def NAME : sme2_mla_add_sub_array_vg4_multi_base<0b0, op, MatrixOp32, ZZZZ_s_mul_r, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME) MatrixOp32:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_s_mul_r:$Zn, ZZZZ_s_mul_r:$Zm), 0>; } -multiclass sme2_mla_add_sub_array_vg4_multi_D op>{ - def NAME : sme2_mla_add_sub_array_vg4_multi_base<0b1, op, MatrixOp64, ZZZZ_d_mul_r, mnemonic>; +multiclass sme2_mla_add_sub_array_vg4_multi_D op, SDPatternOperator intrinsic>{ + def NAME : sme2_mla_add_sub_array_vg4_multi_base<0b1, op, MatrixOp64, ZZZZ_d_mul_r, mnemonic>, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME) MatrixOp64:$ZAd, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_d_mul_r:$Zn, ZZZZ_d_mul_r:$Zm), 0>; @@ -1422,9 +1591,10 @@ // SME2 Multi-vector - Multiple and Single SVE Destructive // Two and Four registers -class sme2_sqdmulh_add_vector_vg2_single sz, bits<6> op, - RegisterOperand vector_ty, - ZPRRegOp zpr_ty, string mnemonic> +class sme2_sve_destructive_vector_vg2_single sz, bits<7> op, + RegisterOperand vector_ty, + ZPRRegOp zpr_ty, + string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { @@ -1435,22 +1605,32 @@ let Inst{21-20} = 0b10; let Inst{19-16} = Zm; let Inst{15-11} = 0b10100; - let Inst{10-5} = op; + let Inst{10-9} = op{6-5}; + let Inst{8} = op{4}; //f + let Inst{7-5} = op{3-1}; let Inst{4-1} = Zdn; - let Inst{0} = 0b0; + let Inst{0} = op{0}; + let Constraints = "$Zdn = $_Zdn"; } -multiclass sme2_sqdmulh_add_vector_vg2_single op> { - def _B : sme2_sqdmulh_add_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>; - def _H : sme2_sqdmulh_add_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; - def _S : sme2_sqdmulh_add_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; - def _D : sme2_sqdmulh_add_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; +multiclass sme2_fp_sve_destructive_vector_vg2_single op> { + def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; + def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; + def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; } -class sme2_sqdmulh_add_vector_vg4_single sz, bits<6> op, - RegisterOperand vector_ty, - ZPRRegOp zpr_ty, string mnemonic> +multiclass sme2_int_sve_destructive_vector_vg2_single op> { + def _B : sme2_sve_destructive_vector_vg2_single<0b00, op, ZZ_b_mul_r, ZPR4b8, mnemonic>; + def _H : sme2_sve_destructive_vector_vg2_single<0b01, op, ZZ_h_mul_r, ZPR4b16, mnemonic>; + def _S : sme2_sve_destructive_vector_vg2_single<0b10, op, ZZ_s_mul_r, ZPR4b32, mnemonic>; + def _D : sme2_sve_destructive_vector_vg2_single<0b11, op, ZZ_d_mul_r, ZPR4b64, mnemonic>; +} + +class sme2_sve_destructive_vector_vg4_single sz, bits<7> op, + RegisterOperand vector_ty, + ZPRRegOp zpr_ty, + string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { @@ -1461,22 +1641,32 @@ let Inst{21-20} = 0b10; let Inst{19-16} = Zm; let Inst{15-11} = 0b10101; - let Inst{10-5} = op; + let Inst{10-9} = op{6-5}; + let Inst{8} = op{4}; //f + let Inst{7-5} = op{3-1}; let Inst{4-2} = Zdn; - let Inst{1-0} = 0b00; + let Inst{1} = 0b0; + let Inst{0} = op{0}; + let Constraints = "$Zdn = $_Zdn"; } -multiclass sme2_sqdmulh_add_vector_vg4_single op> { - def _B : sme2_sqdmulh_add_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>; - def _H : sme2_sqdmulh_add_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; - def _S : sme2_sqdmulh_add_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; - def _D : sme2_sqdmulh_add_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; +multiclass sme2_fp_sve_destructive_vector_vg4_single op> { + def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; + def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; + def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; } +multiclass sme2_int_sve_destructive_vector_vg4_single op> { + def _B : sme2_sve_destructive_vector_vg4_single<0b00, op, ZZZZ_b_mul_r, ZPR4b8, mnemonic>; + def _H : sme2_sve_destructive_vector_vg4_single<0b01, op, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; + def _S : sme2_sve_destructive_vector_vg4_single<0b10, op, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; + def _D : sme2_sve_destructive_vector_vg4_single<0b11, op, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; +} -class sme2_sqdmulh_vector_vg2_multi sz, RegisterOperand vector_ty, - string mnemonic> +class sme2_sve_destructive_vector_vg2_multi sz, bit f, bits<6> op, + RegisterOperand vector_ty, + string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { @@ -1486,21 +1676,32 @@ let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-17} = Zm; - let Inst{16-5} = 0b010110100000; + let Inst{16-11} = 0b010110; + let Inst{10-9} = op{5-4}; + let Inst{8} = f; + let Inst{7-5} = op{3-1}; let Inst{4-1} = Zdn; - let Inst{0} = 0b0; + let Inst{0} = op{0}; + let Constraints = "$Zdn = $_Zdn"; } -multiclass sme2_sqdmulh_vector_vg2_multi { - def _B : sme2_sqdmulh_vector_vg2_multi<0b00, ZZ_b_mul_r, mnemonic>; - def _H : sme2_sqdmulh_vector_vg2_multi<0b01, ZZ_h_mul_r, mnemonic>; - def _S : sme2_sqdmulh_vector_vg2_multi<0b10, ZZ_s_mul_r, mnemonic>; - def _D : sme2_sqdmulh_vector_vg2_multi<0b11, ZZ_d_mul_r, mnemonic>; +multiclass sme2_fp_sve_destructive_vector_vg2_multi op> { + def _H : sme2_sve_destructive_vector_vg2_multi<0b01, 0b1, op, ZZ_h_mul_r, mnemonic>; + def _S : sme2_sve_destructive_vector_vg2_multi<0b10, 0b1, op, ZZ_s_mul_r, mnemonic>; + def _D : sme2_sve_destructive_vector_vg2_multi<0b11, 0b1, op, ZZ_d_mul_r, mnemonic>; } -class sme2_sqdmulh_vector_vg4_multi sz, RegisterOperand vector_ty, - string mnemonic> +multiclass sme2_int_sve_destructive_vector_vg2_multi op> { + def _B : sme2_sve_destructive_vector_vg2_multi<0b00, 0b0, op, ZZ_b_mul_r, mnemonic>; + def _H : sme2_sve_destructive_vector_vg2_multi<0b01, 0b0, op, ZZ_h_mul_r, mnemonic>; + def _S : sme2_sve_destructive_vector_vg2_multi<0b10, 0b0, op, ZZ_s_mul_r, mnemonic>; + def _D : sme2_sve_destructive_vector_vg2_multi<0b11, 0b0, op, ZZ_d_mul_r, mnemonic>; +} + +class sme2_sve_destructive_vector_vg4_multi sz, bit f, bits<6> op, + RegisterOperand vector_ty, + string mnemonic> : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), mnemonic, "\t$Zdn, $_Zdn, $Zm", "", []>, Sched<[]> { @@ -1510,17 +1711,97 @@ let Inst{23-22} = sz; let Inst{21} = 0b1; let Inst{20-18} = Zm; - let Inst{17-5} = 0b0010111100000; + let Inst{17-11} = 0b0010111; + let Inst{10-9} = op{5-4}; + let Inst{8} = f; + let Inst{7-5} = op{3-1}; let Inst{4-2} = Zdn; - let Inst{1-0} = 0b00; + let Inst{1} = 0b0; + let Inst{0} = op{0}; + let Constraints = "$Zdn = $_Zdn"; } -multiclass sme2_sqdmulh_vector_vg4_multi { - def _B : sme2_sqdmulh_vector_vg4_multi<0b00, ZZZZ_b_mul_r, mnemonic>; - def _H : sme2_sqdmulh_vector_vg4_multi<0b01, ZZZZ_h_mul_r, mnemonic>; - def _S : sme2_sqdmulh_vector_vg4_multi<0b10, ZZZZ_s_mul_r, mnemonic>; - def _D : sme2_sqdmulh_vector_vg4_multi<0b11, ZZZZ_d_mul_r, mnemonic>; +multiclass sme2_fp_sve_destructive_vector_vg4_multi op> { + def _H : sme2_sve_destructive_vector_vg4_multi<0b01, 0b1, op, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_sve_destructive_vector_vg4_multi<0b10, 0b1, op, ZZZZ_s_mul_r, mnemonic>; + def _D : sme2_sve_destructive_vector_vg4_multi<0b11, 0b1, op, ZZZZ_d_mul_r, mnemonic>; +} + +multiclass sme2_int_sve_destructive_vector_vg4_multi op> { + def _B : sme2_sve_destructive_vector_vg4_multi<0b00, 0b0, op, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_sve_destructive_vector_vg4_multi<0b01, 0b0, op, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_sve_destructive_vector_vg4_multi<0b10, 0b0, op, ZZZZ_s_mul_r, mnemonic>; + def _D : sme2_sve_destructive_vector_vg4_multi<0b11, 0b0, op, ZZZZ_d_mul_r, mnemonic>; +} + +//===----------------------------------------------------------------------===// +// SME2 multi-vec CLAMP registers + +class sme2_clamp_vector_vg24_multi sz, bits<2> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> + : I<(outs multi_vector_ty:$Zd), + (ins multi_vector_ty:$_Zd, vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]>{ + bits<5> Zm; + bits<5> Zn; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-12} = 0b1100; + let Inst{11-10} = op1; + let Inst{9-5} = Zn; + let Inst{0} = u; + + let Constraints = "$Zd = $_Zd"; +} + +class sme2_clamp_vector_vg2_multi sz, bits<2> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> + : sme2_clamp_vector_vg24_multi{ + bits<4> Zd; + let Inst{4-1} = Zd; +} + +multiclass sme2_fp_clamp_vector_vg2_multi{ + def _H : sme2_clamp_vector_vg2_multi<0b01, 0b00, 0b0, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg2_multi<0b10, 0b00, 0b0, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg2_multi<0b11, 0b00, 0b0, ZZ_d_mul_r, ZPR64, mnemonic>; +} + +multiclass sme2_int_clamp_vector_vg2_multi{ + def _B : sme2_clamp_vector_vg2_multi<0b00, 0b01, u, ZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_clamp_vector_vg2_multi<0b01, 0b01, u, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg2_multi<0b10, 0b01, u, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg2_multi<0b11, 0b01, u, ZZ_d_mul_r, ZPR64, mnemonic>; +} + +class sme2_clamp_vector_vg4_multi sz, bits<2> op1, bit u, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, string mnemonic> + : sme2_clamp_vector_vg24_multi{ + bits<3> Zd; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; +} + +multiclass sme2_fp_clamp_vector_vg4_multi{ + def _H : sme2_clamp_vector_vg4_multi<0b01, 0b10, 0b0, ZZZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg4_multi<0b10, 0b10, 0b0, ZZZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg4_multi<0b11, 0b10, 0b0, ZZZZ_d_mul_r, ZPR64, mnemonic>; +} + +multiclass sme2_int_clamp_vector_vg4_multi{ + def _B : sme2_clamp_vector_vg4_multi<0b00, 0b11, u, ZZZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_clamp_vector_vg4_multi<0b01, 0b11, u, ZZZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_clamp_vector_vg4_multi<0b10, 0b11, u, ZZZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_clamp_vector_vg4_multi<0b11, 0b11, u, ZZZZ_d_mul_r, ZPR64, mnemonic>; } //===----------------------------------------------------------------------===// @@ -1530,7 +1811,7 @@ RegisterOperand multi_vector_ty, string mnemonic, string vg_acronym=""> : I<(outs MatrixOp32:$ZAda), - (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH:$i3), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, index_ty:$imm, multi_vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), mnemonic, "\t$ZAda[$Rv, $imm" # !if(!eq(vg_acronym, ""), "", ", " # vg_acronym) # "], $Zn, $Zm$i3", "", []>, Sched<[]> { bits<4> Zm; @@ -1547,9 +1828,9 @@ let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_long_array_index op0, bits<2> op> { +multiclass sme2_mla_long_array_index op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array_index_base { + mnemonic>, SMEPseudo2Instr { bits<3> i3; bits<5> Zn; bits<3> imm; @@ -1558,8 +1839,13 @@ let Inst{9-5} = Zn; let Inst{2-0} = imm; } + + def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_Multi_Index_Pat; } + class sme2_mla_long_array_vg2_index op0, bits<2> op> : sme2_mla_long_array_index_base { @@ -1574,18 +1860,26 @@ let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_index op> { - def _S : sme2_mla_long_array_vg2_index; +multiclass sme2_fp_mla_long_array_vg2_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i3), 0>; + (!cast(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } -multiclass sme2_int_mla_long_array_vg2_index op> { - def _S : sme2_mla_long_array_vg2_index; +multiclass sme2_int_mla_long_array_vg2_index op, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg2_index, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i3), 0>; + (!cast(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } class sme2_mla_long_array_vg4_index op0, bits<2> op> @@ -1602,18 +1896,26 @@ let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg4_index op> { - def _S : sme2_mla_long_array_vg4_index; +multiclass sme2_fp_mla_long_array_vg4_index op, ValueType zpr_ty, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i3), 0>; + (!cast(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } -multiclass sme2_int_mla_long_array_vg4_index op> { - def _S : sme2_mla_long_array_vg4_index; +multiclass sme2_int_mla_long_array_vg4_index op, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg4_index, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i3), 0>; + (!cast(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH32b_timm:$i3), 0>; } class sme2_mla_long_arrayop0, bits<2> op, Operand index_ty, @@ -1638,9 +1940,9 @@ let Constraints = "$ZAda = $_ZAda"; } -multiclass sme2_mla_long_array_single op0, bits<2> op> { +multiclass sme2_mla_long_array_single op0, bits<2> op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array { + mnemonic>, SMEPseudo2Instr { bits<4> Zm; bits<5> Zn; bits<3> imm; @@ -1649,8 +1951,13 @@ let Inst{9-5} = Zn; let Inst{2-0} = imm; } + + def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_Multi_Single_Pat; } + class sme2_mla_long_array_vg24_single op0, bit vg4, bits<2> op, RegisterOperand first_vector_ty, string mnemonic, string vg_acronym> @@ -1666,33 +1973,49 @@ let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_single op> { +multiclass sme2_fp_mla_long_array_vg2_single op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array_vg24_single<0b00, 0b0, op, ZZ_h, mnemonic, - "vgx2">; + "vgx2">, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -multiclass sme2_int_mla_long_array_vg2_single op> { +multiclass sme2_int_mla_long_array_vg2_single op, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array_vg24_single<0b01, 0b0, op, ZZ_h, mnemonic, - "vgx2">; + "vgx2">, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -multiclass sme2_fp_mla_long_array_vg4_single op> { +multiclass sme2_fp_mla_long_array_vg4_single op, ValueType zpr_ty, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array_vg24_single<0b00, 0b1, op, ZZZZ_h, mnemonic, - "vgx4">; + "vgx4">, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; } -multiclass sme2_int_mla_long_array_vg4_single op> { +multiclass sme2_int_mla_long_array_vg4_single op, SDPatternOperator intrinsic> { def _S : sme2_mla_long_array_vg24_single<0b01, 0b1, op, ZZZZ_h, mnemonic, - "vgx4">; + "vgx4">, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_single_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Single_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; @@ -1711,15 +2034,23 @@ let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg2_multi op> { - def _S : sme2_mla_long_array_vg2_multi; +multiclass sme2_fp_mla_long_array_vg2_multi op, ValueType zpr_ty, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; } -multiclass sme2_int_mla_long_array_vg2_multi op> { - def _S : sme2_mla_long_array_vg2_multi; +multiclass sme2_int_mla_long_array_vg2_multi op, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg2_multi, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; @@ -1740,15 +2071,23 @@ let Inst{1-0} = imm; } -multiclass sme2_fp_mla_long_array_vg4_multi op> { - def _S : sme2_mla_long_array_vg4_multi; +multiclass sme2_fp_mla_long_array_vg4_multi op, ValueType zpr_ty, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; } -multiclass sme2_int_mla_long_array_vg4_multi op> { - def _S : sme2_mla_long_array_vg4_multi; +multiclass sme2_int_mla_long_array_vg4_multi op, SDPatternOperator intrinsic> { + def _S : sme2_mla_long_array_vg4_multi, SMEPseudo2Instr; + + def _S_PSEUDO : sme2_za_array_2op_multi_multi_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Multi_Pat; def : InstAlias(NAME #_S) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s2range:$imm2, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; @@ -1794,3 +2133,2711 @@ def _StoB : sme2_cvt_vg4_single<0b0, op, ZPR8, ZZZZ_s_mul_r, mnemonic>; def _DtoH : sme2_cvt_vg4_single<0b1, op, ZPR16, ZZZZ_d_mul_r, mnemonic>; } + +class sme2_fp_cvt_vg2_multi op> + : I<(outs ZZ_s_mul_r:$Zd), (ins ZZ_s_mul_r:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<4> Zn; + bits<4> Zd; + let Inst{31-18} = 0b11000001001000; + let Inst{17-16} = op{2-1}; + let Inst{15-10} = 0b111000; + let Inst{9-6} = Zn; + let Inst{5} = op{0}; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + + +class sme2_fp_cvt_vg4_multi op> + : I<(outs ZZZZ_s_mul_r:$Zd), (ins ZZZZ_s_mul_r:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + bits<3> Zn; + bits<3> Zd; + let Inst{31-18} = 0b11000001001100; + let Inst{17-16} = op{2-1}; + let Inst{15-10} = 0b111000; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + let Inst{5} = op{0}; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +//===----------------------------------------------------------------------===// +// SME2 Dot Products and MLA + +// SME2 multi-vec ternary indexed two registers 32-bit +class sme2_multi_vec_array_vg2_index_32b opc2, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, + string mnemonic> + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i2), + mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i2", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<2> i2; + bits<4> Zn; + bits<3> imm3; + let Inst{31-20} = 0b110000010101; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12} = op; + let Inst{11-10} = i2; + let Inst{9-6} = Zn; + let Inst{5-3} = opc2; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmla_array_vg2_index opc, SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg2_index_32b, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7 +:$imm3, ZZ_s_mul_r:$Zn, ZPR4b32:$Zm, VectorIndexS32b_timm:$i2), 0>; +} + +multiclass sme2_dot_array_vg2_index_HToS opc> { + def NAME : sme2_multi_vec_array_vg2_index_32b; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexS32b_timm:$i2), 0>; +} +multiclass sme2_dot_array_vg2_index_BToS opc> { + def NAME : sme2_multi_vec_array_vg2_index_32b; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i2), 0>; +} + +// SME2 multi-vec ternary indexed two registers 64-bit + +class sme2_multi_vec_array_vg2_index_64b opc, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, + string mnemonic> + : I<(outs MatrixOp64:$ZAda), + (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), + mnemonic, "\t$ZAda[$Rv, $imm3, vgx2], $Zn, $Zm$i1", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<1> i1; + bits<4> Zn; + bits<3> imm3; + let Inst{31-20} = 0b110000011101; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-11} = 0b00; + let Inst{10} = i1; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-3} = opc; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmla_array_vg2_index_D opc, SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg2_index_64b, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG2_Multi_Index_Pat; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_d_mul_r:$Zn, ZPR4b64:$Zm, VectorIndexD32b_timm:$i1), 0>; +} + +multiclass sme2_dot_array_vg2_index opc> { + def NAME : sme2_multi_vec_array_vg2_index_64b; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexD32b_timm:$i1), 0>; +} + +// SME2 multi-vec ternary indexed four registers 32-bit + +class sme2_multi_vec_array_vg4_index_32b opc2, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, + string mnemonic> + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexS32b_timm:$i2), + mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i2", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<2> i2; + bits<3> Zn; + bits<3> imm3; + let Inst{31-20} = 0b110000010101; + let Inst{19-16} = Zm; + let Inst{15} = 0b1; + let Inst{14-13} = Rv; + let Inst{12} = op; + let Inst{11-10} = i2; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + let Inst{5-3} = opc2; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmla_array_vg4_index_S opc, SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg4_index_32b, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_s_mul_r:$Zn, ZPR4b32:$Zm, VectorIndexS32b_timm:$i2), 0>; +} + +multiclass sme2_dot_array_vg4_index_BtoS opc> { + def NAME : sme2_multi_vec_array_vg4_index_32b; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexS32b_timm:$i2), 0>; +} + +multiclass sme2_dot_array_vg4_index_HtoS opc> { + def NAME : sme2_multi_vec_array_vg4_index_32b; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexS32b_timm:$i2), 0>; +} + +// SME2 multi-vec ternary indexed four registers 64-bit +class sme2_multi_vec_array_vg4_index_64b opc2, + RegisterOperand multi_vector_ty, + ZPRRegOp vector_ty, + string mnemonic> + : I<(outs MatrixOp64:$ZAda), + (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, vector_ty:$Zm, VectorIndexD32b_timm:$i1), + mnemonic, "\t$ZAda[$Rv, $imm3, vgx4], $Zn, $Zm$i1", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<1> i1; + bits<3> Zn; + bits<3> imm3; + let Inst{31-20} = 0b110000011101; + let Inst{19-16} = Zm; + let Inst{15} = 0b1; + let Inst{14-13} = Rv; + let Inst{12} = 0b0; + let Inst{11} = op; + let Inst{10} = i1; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; + let Inst{4-3} = opc2; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fmla_array_vg4_index_D opc, SDPatternOperator intrinsic> { + def NAME : sme2_multi_vec_array_vg4_index_64b, SMEPseudo2Instr; + + def _PSEUDO : sme2_za_array_2op_multi_index_pseudo; + + def : SME2_ZA_TwoOp_VG4_Multi_Index_Pat; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_d_mul_r:$Zn, ZPR4b64:$Zm, VectorIndexD32b_timm:$i1), 0>; +} + +multiclass sme2_dot_array_vg4_index opc> { + def _D : sme2_multi_vec_array_vg4_index_64b; + + def : InstAlias(NAME # _D) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexD32b_timm:$i1), 0>; +} + +class sme2_fp_dot_array_vg24_single + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, ZPR4b16:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm3, " # vg_acronym # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<5> Zn; + bits<3> imm3; + let Inst{31-21} = 0b11000001001; + let Inst{20} = vg4; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b100; + let Inst{9-5} = Zn; + let Inst{4} = op; + let Inst{3} = 0b0; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fp_dot_array_vg2_single{ + def NAME : sme2_fp_dot_array_vg24_single<0b0, op, ZZ_h, mnemonic, "vgx2">; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +multiclass sme2_fp_dot_array_vg4_single{ + def NAME : sme2_fp_dot_array_vg24_single<0b1, op, ZZZZ_h, mnemonic, "vgx4">; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +class sme2_fp_dot_array_vg24_multi + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, multi_vector_ty:$Zn, multi_vector_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm3, " # vg_acronym # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> Rv; + bits<3> imm3; + let Inst{31-21} = 0b11000001101; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b100; + let Inst{5} = 0b0; + let Inst{4} = op; + let Inst{3} = 0b0; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_fp_dot_array_vg2_multi { + def NAME : sme2_fp_dot_array_vg24_multi{ + bits<4> Zm; + bits<4> Zn; + let Inst{20-17} = Zm; + let Inst{16} = 0b0; + let Inst{9-6} = Zn; + } + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; +} + +multiclass sme2_fp_dot_array_vg4_multi { + def NAME : sme2_fp_dot_array_vg24_multi{ + bits<3> Zm; + bits<3> Zn; + let Inst{20-18} = Zm; + let Inst{17-16} = 0b01; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; +} + +class sme2_int_dot_array_vg24_single_2way + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, zpr_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<5> Zn; + bits<3> imm3; + let Inst{31-23} = 0b110000010; + let Inst{22} = op; + let Inst{21} = 0b1; + let Inst{20} = vg4; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b101; + let Inst{9-5} = Zn; + let Inst{4} = u; + let Inst{3} = 0b1; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +// SME2 single-multi two-way dot product two registers +multiclass sme2_int_dot_array_vg2_single_HtoS { + def NAME: sme2_int_dot_array_vg24_single_2way<0b1, 0b0, u, ZZ_h, ZPR4b16, mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +// SME2 single-multi two-way dot product four registers + +multiclass sme2_int_dot_array_vg4_single_HtoS { + def NAME: sme2_int_dot_array_vg24_single_2way<0b1, 0b1, u, ZZZZ_h, ZPR4b16, mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +multiclass sme2_int_dot_array_vg2_single { + def _BtoS: sme2_int_dot_array_vg24_single_2way<0b0, 0b0, u, ZZ_b, ZPR4b8, mnemonic>; + + def : InstAlias(NAME # _BtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b:$Zn, ZPR4b8:$Zm), 0>; +} + +multiclass sme2_int_dot_array_vg4_single { + def _BtoS: sme2_int_dot_array_vg24_single_2way<0b0, 0b1, u, ZZZZ_b, ZPR4b8, mnemonic>; + + def : InstAlias(NAME # _BtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b:$Zn, ZPR4b8:$Zm), 0>; +} + +class sme2_int_dot_array_vg24_single_4way + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, zpr_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm3, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<5> Zn; + bits<3> imm3; + let Inst{31-23} = 0b110000010; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20} = vg4; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b101; + let Inst{9-5} = Zn; + let Inst{4} = u; + let Inst{3} = 0b0; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +// SME2 single-multi four-way dot product two registers + +multiclass sme2_int_dot_array_vg2_single_BtoS { + def NAME : sme2_int_dot_array_vg24_single_4way<0b0, 0b0, u, MatrixOp32, ZZ_b, ZPR4b8, mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b:$Zn, ZPR4b8:$Zm), 0>; +} + +multiclass sme2_int_dot_array_vg2_single_HtoD { + def NAME : sme2_int_dot_array_vg24_single_4way<0b1, 0b0, u, MatrixOp64, ZZ_h, ZPR4b16, mnemonic>; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +// SME2 single-multi four-way dot product four registers + +multiclass sme2_int_dot_array_vg4_single_BtoS { + def NAME : sme2_int_dot_array_vg24_single_4way<0b0, 0b1, u, MatrixOp32, ZZZZ_b, ZPR4b8, mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b:$Zn, ZPR4b8:$Zm), 0>; +} +multiclass sme2_int_dot_array_vg4_single_HtoD { + def NAME : sme2_int_dot_array_vg24_single_4way<0b1, 0b1, u, MatrixOp64, ZZZZ_h, ZPR4b16, mnemonic>; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +class sme2_int_dot_array_vg24_multi_2way + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, multi_vector_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm3, " # vg_acronym # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> Rv; + bits<3> imm3; + let Inst{31-23} = 0b110000011; + let Inst{22} = op0; + let Inst{21} = 0b1; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b101; + let Inst{5} = 0b0; + let Inst{4} = u; + let Inst{3} = 0b1; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + + +// SME2 multi two-way dot product two registers + +multiclass sme2_int_dot_array_vg2_multi_HtoS { + def NAME : sme2_int_dot_array_vg24_multi_2way<0b1, u, ZZ_h_mul_r, mnemonic, + "vgx2"> { + bits<4> Zm; + bits<4> Zn; + let Inst{20-17} = Zm; + let Inst{16} = 0b0; + let Inst{9-6} = Zn; + } + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; + +} + +multiclass sme2_int_dot_array_vg2_multi { + def _BtoS : sme2_int_dot_array_vg24_multi_2way<0b0, 0b0, ZZ_b_mul_r, mnemonic, + "vgx2"> { + bits<4> Zm; + bits<4> Zn; + let Inst{20-17} = Zm; + let Inst{16} = 0b0; + let Inst{9-6} = Zn; + } + + def : InstAlias(NAME # _BtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b_mul_r:$Zn, ZZ_b_mul_r:$Zm), 0>; + +} + +// SME2 multi two-way dot product four registers + +multiclass sme2_int_dot_array_vg4_multi_HtoS { + def NAME : sme2_int_dot_array_vg24_multi_2way<0b1, u, ZZZZ_h_mul_r, mnemonic, + "vgx4"> { + bits<3> Zm; + bits<3> Zn; + let Inst{20-18} = Zm; + let Inst{17-16} = 0b01; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; +} + +multiclass sme2_int_dot_array_vg4_multi { + def _BtoS : sme2_int_dot_array_vg24_multi_2way<0b0, 0b0, ZZZZ_b_mul_r, mnemonic, + "vgx4"> { + bits<3> Zm; + bits<3> Zn; + let Inst{20-18} = Zm; + let Inst{17-16} = 0b01; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def : InstAlias(NAME # _BtoS) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZZZZ_b_mul_r:$Zm), 0>; +} + +class sme2_int_dot_array_vg24_multi_4way + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, + multi_vector_ty:$Zn, multi_vector_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm3, " # vg_acronym # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> Rv; + bits<3> imm3; + let Inst{31-23} = 0b110000011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b101; + let Inst{5} = 0b0; + let Inst{4} = u; + let Inst{3} = 0b0; + let Inst{2-0} = imm3; + + let Constraints = "$ZAda = $_ZAda"; +} + +// SME2 multi four-way dot product two registers + +multiclass sme2_int_dot_array_vg2_multi_BtoS { + def NAME : sme2_int_dot_array_vg24_multi_4way<0b0, u, MatrixOp32, ZZ_b_mul_r, + mnemonic, "vgx2"> { + bits<4> Zm; + bits<4> Zn; + let Inst{20-17} = Zm; + let Inst{16} = 0b0; + let Inst{9-6} = Zn; + } + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_b_mul_r:$Zn, ZZ_b_mul_r:$Zm), 0>; +} + +multiclass sme2_int_dot_array_vg2_multi_HtoD { + def NAME : sme2_int_dot_array_vg24_multi_4way<0b1, u, MatrixOp64, ZZ_h_mul_r, + mnemonic, "vgx2"> { + bits<4> Zm; + bits<4> Zn; + let Inst{20-17} = Zm; + let Inst{16} = 0b0; + let Inst{9-6} = Zn; + } + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; +} + +// SME2 multi four-way dot product four registers +multiclass sme2_int_dot_array_vg4_multi_BtoS { + def NAME : sme2_int_dot_array_vg24_multi_4way<0b0, u, MatrixOp32, ZZZZ_b_mul_r, + mnemonic, "vgx4"> { + bits<3> Zm; + bits<3> Zn; + let Inst{20-18} = Zm; + let Inst{17-16} = 0b01; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_b_mul_r:$Zn, ZZZZ_b_mul_r:$Zm), 0>; +} + +multiclass sme2_int_dot_array_vg4_multi_HtoD { + def NAME : sme2_int_dot_array_vg24_multi_4way<0b1, u, MatrixOp64, ZZZZ_h_mul_r, + mnemonic, "vgx4"> { + bits<3> Zm; + bits<3> Zn; + let Inst{20-18} = Zm; + let Inst{17-16} = 0b01; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, sme_elm_idx0_7:$imm3, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; +} + +//===----------------------------------------------------------------------===// +// SME2 Outer Product and Accumulate +class sme2_mopx_tile_base s, ZPRRegOp vector_ty, string mnemonic> + : I<(outs TileOp32:$ZAda), + (ins TileOp32:$_ZAda, PPR3bAny:$Pn, PPR3bAny:$Pm, vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$ZAda, $Pn/m, $Pm/m, $Zn, $Zm", + "", []>, Sched<[]> { + bits<2> ZAda; + bits<5> Zn; + bits<3> Pn; + bits<3> Pm; + bits<5> Zm; + let Inst{31-30} = 0b10; + let Inst{29} = s{2}; + let Inst{28-25} = 0b0000; + let Inst{24} = s{1}; + let Inst{23-21} = 0b100; + let Inst{20-16} = Zm; + let Inst{15-13} = Pm; + let Inst{12-10} = Pn; + let Inst{9-5} = Zn; + let Inst{4} = s{0}; + let Inst{3-2} = 0b10; + let Inst{1-0} = ZAda; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_bmopx_tile_base s>{ + def NAME: sme2_mopx_tile_base; +} + +multiclass sme2_int_mopx_tile_base s>{ + def NAME: sme2_mopx_tile_base; +} + + +//===----------------------------------------------------------------------===// +// SME2 multi-vec indexed long long MLA one source 32-bit +class sme2_mla_ll_array_index_32b op> + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR8:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), + mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<4> i; + bits<5> Zn; + bits<2> imm2; + let Inst{31-20} = 0b110000010000; + let Inst{19-16} = Zm; + let Inst{15} = i{3}; + let Inst{14-13} = Rv; + let Inst{12-10} = i{2-0}; + let Inst{9-5} = Zn; + let Inst{4-2} = op; + let Inst{1-0} = imm2; + + let Constraints = "$ZAda = $_ZAda"; +} + +// SME2 multi-vec indexed long long MLA one source 64-bit + +class sme2_mla_ll_array_index_64b op> + : I<(outs MatrixOp64:$ZAda), + (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm2, ZPR16:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), + mnemonic, "\t$ZAda[$Rv, $imm2], $Zn, $Zm$i", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<4> i; + bits<5> Zn; + bits<2> imm2; + let Inst{31-20} = 0b110000011000; + let Inst{19-16} = Zm; + let Inst{15} = i{2}; + let Inst{14-13} = Rv; + let Inst{12} = 0b0; + let Inst{11-10} = i{1-0}; + let Inst{9-5} = Zn; + let Inst{4-3} = op; + let Inst{2} = 0b0; + let Inst{1-0} = imm2; + + let Constraints = "$ZAda = $_ZAda"; +} + +class sme2_mla_ll_array_vg24_index_32b op, + RegisterOperand vector_ty, + string mnemonic> + : I<(outs MatrixOp32:$ZAda), + (ins MatrixOp32:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, + vector_ty:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), + mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<4> i; + bits<1> imm; + let Inst{31-20} = 0b110000010001; + let Inst{19-16} = Zm; + let Inst{15} = vg4; + let Inst{14-13} = Rv; + let Inst{12} = 0b0; + let Inst{11-10} = i{3-2}; + let Inst{5-3} = op; + let Inst{2-1} = i{1-0}; + let Inst{0} = imm; + + let Constraints = "$ZAda = $_ZAda"; +} + +//SME2 multi-vec indexed long long MLA two sources 32-bit + +multiclass sme2_mla_ll_array_vg2_index_32b op> { + def NAME: sme2_mla_ll_array_vg24_index_32b<0b0, op, ZZ_b_mul_r, mnemonic> { + bits<4> Zn; + let Inst{9-6} = Zn; + } + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>; +} + +// SME2 multi-vec indexed long long MLA four sources 32-bit + +multiclass sme2_mla_ll_array_vg4_index_32b op> { + def NAME: sme2_mla_ll_array_vg24_index_32b<0b1, op, ZZZZ_b_mul_r, mnemonic> { + bits<3> Zn; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZPR4b8:$Zm, VectorIndexB:$i), 0>; +} +class sme2_mla_ll_array_vg24_index_64b op, + RegisterOperand vector_ty, + string mnemonic> + : I<(outs MatrixOp64:$ZAda), + (ins MatrixOp64:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, + vector_ty:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), + mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm$i", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<4> i; + bits<1> imm; + let Inst{31-20} = 0b110000011001; + let Inst{19-16} = Zm; + let Inst{15} = vg4; + let Inst{14-13} = Rv; + let Inst{12-11} = 0b00; + let Inst{10} = i{2}; + let Inst{5} = 0b0; + let Inst{4-3} = op; + let Inst{2-1} = i{1-0}; + let Inst{0} = imm; + + let Constraints = "$ZAda = $_ZAda"; +} + +// SME2 multi-vec indexed long long MLA two sources 64-bit + +multiclass sme2_mla_ll_array_vg2_index_64b op> { + def NAME: sme2_mla_ll_array_vg24_index_64b<0b0, op, ZZ_h_mul_r, mnemonic>{ + bits<4> Zn; + let Inst{9-6} = Zn; + } + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>; +} + +// SME2 multi-vec indexed long long MLA four sources 64-bit + +multiclass sme2_mla_ll_array_vg4_index_64b op> { + def NAME: sme2_mla_ll_array_vg24_index_64b<0b1, op, ZZZZ_h_mul_r, mnemonic>{ + bits<3> Zn; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZPR4b16:$Zm, VectorIndexH:$i), 0>; +} + + +//SME2 multiple and single vector long long FMA one source + +class sme2_mla_ll_array_single_base op, + MatrixOperand matrix_ty, + ZPRRegOp vector_ty, + ZPRRegOp zpr_ty, + string mnemonic> + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm2s4range:$imm, + vector_ty:$Zn, zpr_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm], $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<5> Zn; + bits<2> imm; + let Inst{31-23} = 0b110000010; + let Inst{22} = sz; + let Inst{21-20} = 0b10; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b001; + let Inst{9-5} = Zn; + let Inst{4-2} = op; + let Inst{1-0} = imm; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_mla_ll_array_single_32b op> { + def NAME : sme2_mla_ll_array_single_base<0b0, op, MatrixOp32, ZPR8, + ZPR4b8, mnemonic>; +} +multiclass sme2_mla_ll_array_single_64b op> { + def NAME : sme2_mla_ll_array_single_base<0b1, op, MatrixOp64, ZPR16, + ZPR4b16, mnemonic>; +} +class sme2_mla_ll_array_vg24_single_base op, + MatrixOperand matrix_ty, + RegisterOperand vector_ty, + ZPRRegOp zpr_ty, + string mnemonic> + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, + vector_ty:$Zn, zpr_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm, " # !if(vg4, "vgx4", "vgx2") # "], $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<5> Zn; + bits<1> imm; + let Inst{31-23} = 0b110000010; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20} = vg4; + let Inst{19-16} = Zm; + let Inst{15} = 0b0; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b000; + let Inst{9-5} = Zn; + let Inst{4-2} = op; + let Inst{1} = 0b0; + let Inst{0} = imm; + + let Constraints = "$ZAda = $_ZAda"; +} + +//SME2 single-multi long long MLA two sources + +multiclass sme2_mla_ll_array_vg2_single_32b op> { + def NAME : sme2_mla_ll_array_vg24_single_base<0b0, 0b0, op, MatrixOp32, ZZ_b, + ZPR4b8, mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b:$Zn, ZPR4b8:$Zm), 0>; +} + +multiclass sme2_mla_ll_array_vg2_single_64b op> { + def NAME : sme2_mla_ll_array_vg24_single_base<0b1, 0b0, op, MatrixOp64, ZZ_h, + ZPR4b16, mnemonic>; + + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +// SME2 single-multi long long MLA four sources + +multiclass sme2_mla_ll_array_vg4_single_32b op> { + def NAME : sme2_mla_ll_array_vg24_single_base<0b0, 0b1, op, MatrixOp32, ZZZZ_b, + ZPR4b8, mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b:$Zn, ZPR4b8:$Zm), 0>; +} + +multiclass sme2_mla_ll_array_vg4_single_64b op> { + def NAME : sme2_mla_ll_array_vg24_single_base<0b1, 0b1, op, MatrixOp64, ZZZZ_h, + ZPR4b16, mnemonic>; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h:$Zn, ZPR4b16:$Zm), 0>; +} + +class sme2_mla_ll_array_vg2_multi_base op, + MatrixOperand matrix_ty, + RegisterOperand vector_ty, + string mnemonic> + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, + vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm, vgx2], $Zn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<2> Rv; + bits<4> Zn; + bits<1> imm; + let Inst{31-23} = 0b110000011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20-17} = Zm; + let Inst{16-15} = 0b00; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b000; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-2} = op; + let Inst{1} = 0b0; + let Inst{0} = imm; + + let Constraints = "$ZAda = $_ZAda"; +} + +multiclass sme2_mla_ll_array_vg2_multi_32b op> { + def NAME : sme2_mla_ll_array_vg2_multi_base<0b0, op, MatrixOp32, ZZ_b_mul_r, + mnemonic>; + + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_b_mul_r:$Zn, ZZ_b_mul_r:$Zm), 0>; +} + +multiclass sme2_mla_ll_array_vg2_multi_64b op> { + def NAME : sme2_mla_ll_array_vg2_multi_base<0b1, op, MatrixOp64, ZZ_h_mul_r, + mnemonic>; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZ_h_mul_r:$Zn, ZZ_h_mul_r:$Zm), 0>; +} + +class sme2_mla_ll_array_vg4_multi_base op, + MatrixOperand matrix_ty, + RegisterOperand vector_ty, + string mnemonic> + : I<(outs matrix_ty:$ZAda), + (ins matrix_ty:$_ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, + vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$ZAda[$Rv, $imm, vgx4], $Zn, $Zm", + "", []>, Sched<[]> { + bits<3> Zm; + bits<2> Rv; + bits<3> Zn; + bits<1> imm; + let Inst{31-23} = 0b110000011; + let Inst{22} = sz; + let Inst{21} = 0b1; + let Inst{20-18} = Zm; + let Inst{17-15} = 0b010; + let Inst{14-13} = Rv; + let Inst{12-10} = 0b000; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; + let Inst{4-2} = op; + let Inst{1} = 0b0; + let Inst{0} = imm; + + let Constraints = "$ZAda = $_ZAda"; +} +multiclass sme2_mla_ll_array_vg4_multi_32b op> { + def NAME : sme2_mla_ll_array_vg4_multi_base<0b0, op, MatrixOp32, ZZZZ_b_mul_r, + mnemonic>; + def : InstAlias(NAME) MatrixOp32:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_b_mul_r:$Zn, ZZZZ_b_mul_r:$Zm), 0>; +} + +multiclass sme2_mla_ll_array_vg4_multi_64b op> { + def NAME : sme2_mla_ll_array_vg4_multi_base<0b1, op, MatrixOp64, ZZZZ_h_mul_r, + mnemonic>; + + def : InstAlias(NAME) MatrixOp64:$ZAda, MatrixIndexGPR32Op8_11:$Rv, uimm1s4range:$imm, ZZZZ_h_mul_r:$Zn, ZZZZ_h_mul_r:$Zm), 0>; +} + +//===----------------------------------------------------------------------===/// +// ZERO instructions. +class sme2_zero_zt + : I<(outs ZT0R:$ZT0), (ins ), + mnemonic, "\t\\{ $ZT0 \\}", + "", []>, Sched<[]> { + let Inst{31-0} = 0b11000000010010000000000000000001; +} + +//===----------------------------------------------------------------------===// +// SME2 lookup table load/store +class sme2_spill_fill_vector + : I, Sched<[]> { + bits<5> Rn; + let Inst{31-22} = 0b1110000100; + let Inst{21} = isStore; + let Inst{20-10} = 0b11111100000; + let Inst{9-5} = Rn; + let Inst{4-0} = 0b00000; + + let mayLoad = !not(isStore); + let mayStore = isStore; +} + +//===----------------------------------------------------------------------===/// +// SME2 move to/from lookup table +class sme2_movt_zt_to_scalar + : I<(outs GPR64:$Rt), (ins ZT0R:$ZTt, uimm3s8:$imm3), + mnemonic, "\t$Rt, $ZTt[$imm3]", + "", []>, Sched<[]> { + bits<3> imm3; + bits<5> Rt; + let Inst{31-15} = 0b11000000010011000; + let Inst{14-12} = imm3; + let Inst{11-5} = 0b0011111; + let Inst{4-0} = Rt; +} + +class sme2_movt_scalar_to_zt + : I<(outs ZT0R:$ZTt), (ins uimm3s8:$imm3, GPR64:$Rt), + mnemonic, "\t$ZTt[$imm3], $Rt", + "", []>, Sched<[]> { + bits<3> imm3; + bits<5> Rt; + let Inst{31-15} = 0b11000000010011100; + let Inst{14-12} = imm3; + let Inst{11-5} = 0b0011111; + let Inst{4-0} = Rt; +} + +//===----------------------------------------------------------------------===// +// SME2 Lookup Table Expand +class sme2_luti_vector_index sz, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZT0R:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<5> Zd; + let Inst{31-19} = 0b1100000011001; + let Inst{13-12} = sz; + let Inst{11-10} = 0b00; + let Inst{9-5} = Zn; + let Inst{4-0} = Zd; +} + +class sme2_luti2_vector_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_index { + bits<4> i; + let Inst{18} = 0b1; + let Inst{17-14} = i; +} + +multiclass sme2_luti2_vector_index { + def _B : sme2_luti2_vector_index<0b00, ZPR8, mnemonic>; + def _H : sme2_luti2_vector_index<0b01, ZPR16, mnemonic>; + def _S : sme2_luti2_vector_index<0b10, ZPR32, mnemonic>; +} + +class sme2_luti4_vector_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_index { + bits<3> i; + let Inst{18-17} = 0b01; + let Inst{16-14} = i; +} + +multiclass sme2_luti4_vector_index { + def _B : sme2_luti4_vector_index<0b00, ZPR8, mnemonic>; + def _H : sme2_luti4_vector_index<0b01, ZPR16, mnemonic>; + def _S : sme2_luti4_vector_index<0b10, ZPR32, mnemonic>; +} +class sme2_luti_vector_vg2_index sz, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZT0R:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<4> Zd; + let Inst{31-19} = 0b1100000010001; + let Inst{14} = 0b1; + let Inst{13-12} = sz; + let Inst{11-10} = 0b00; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +class sme2_luti2_vector_vg2_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg2_index { + bits<3> i; + let Inst{18} = 0b1; + let Inst{17-15} = i; +} + +multiclass sme2_luti2_vector_vg2_index { + def _B : sme2_luti2_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_luti2_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_luti2_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; +} + +class sme2_luti4_vector_vg2_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg2_index { + bits<2> i; + let Inst{18-17} = 0b01; + let Inst{16-15} = i; +} + +multiclass sme2_luti4_vector_vg2_index { + def _B : sme2_luti4_vector_vg2_index<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_luti4_vector_vg2_index<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_luti4_vector_vg2_index<0b10, ZZ_s_mul_r, mnemonic>; +} + +class sme2_luti_vector_vg4_index sz, RegisterOperand vector_ty, + AsmVectorIndexOpnd index_ty, string mnemonic> + : I<(outs vector_ty:$Zd), + (ins ZT0R:$ZTt, ZPRAny:$Zn, index_ty:$i), + mnemonic, "\t$Zd, $ZTt, $Zn$i", + "", []>, Sched<[]> { + bits<5> Zn; + bits<3> Zd; + let Inst{31-19} = 0b1100000010001; + let Inst{15-14} = 0b10; + let Inst{13-12} = sz; + let Inst{11-10} = 0b00; + let Inst{9-5} = Zn; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +class sme2_luti2_vector_vg4_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg4_index { + bits<2> i; + let Inst{18} = 0b1; + let Inst{17-16} = i; +} + +multiclass sme2_luti2_vector_vg4_index { + def _B : sme2_luti2_vector_vg4_index<0b00, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_luti2_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_luti2_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; +} + +class sme2_luti4_vector_vg4_index sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_luti_vector_vg4_index { + bits<1> i; + let Inst{18-17} = 0b01; + let Inst{16} = i; +} + +multiclass sme2_luti4_vector_vg4_index { + def _H : sme2_luti4_vector_vg4_index<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_luti4_vector_vg4_index<0b10, ZZZZ_s_mul_r, mnemonic>; +} + +//===----------------------------------------------------------------------===// +// SME2 multi-vec unpack two registers +//===----------------------------------------------------------------------===// + +class sme2_unpk_vector_vg24 sz, bit u, RegisterOperand first_vector_ty, + RegisterOperand second_vector_ty, string mnemonic> + : I<(outs first_vector_ty:$Zd), (ins second_vector_ty:$Zn), + mnemonic, "\t$Zd, $Zn", + "", []>, Sched<[]> { + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{19-10} = 0b0101111000; + let Inst{0} = u; +} + +class sme2_unpk_vector_vg2 sz, bit u, RegisterOperand first_vector_ty, + RegisterOperand second_vector_ty, string mnemonic> + : sme2_unpk_vector_vg24 { + bits<5> Zn; + bits<4> Zd; + let Inst{20} = 0b0; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; +} + +multiclass sme2_unpk_vector_vg2 { + def _H : sme2_unpk_vector_vg2<0b01, u, ZZ_h_mul_r, ZPR8, mnemonic>; + def _S : sme2_unpk_vector_vg2<0b10, u, ZZ_s_mul_r, ZPR16, mnemonic>; + def _D : sme2_unpk_vector_vg2<0b11, u, ZZ_d_mul_r, ZPR32, mnemonic>; +} + +class sme2_unpk_vector_vg4 sz, bit u, RegisterOperand first_vector_ty, + RegisterOperand second_vector_ty, string mnemonic> + : sme2_unpk_vector_vg24 { + bits<4> Zn; + bits<3> Zd; + let Inst{20} = 0b1; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; +} + +multiclass sme2_unpk_vector_vg4 { + def _H : sme2_unpk_vector_vg4<0b01, u, ZZZZ_h_mul_r, ZZ_b_mul_r, mnemonic>; + def _S : sme2_unpk_vector_vg4<0b10, u, ZZZZ_s_mul_r, ZZ_h_mul_r, mnemonic>; + def _D : sme2_unpk_vector_vg4<0b11, u, ZZZZ_d_mul_r, ZZ_s_mul_r, mnemonic>; +} + +//===----------------------------------------------------------------------===// +// SME2 multi-vec ZIP four registers +//===----------------------------------------------------------------------===// + +class sme2_zip_vector_vg4 sz, bit q, bit op, + RegisterOperand first_vector_ty, + RegisterOperand second_vector_ty, string mnemonic> + : sme2_unpk_vector_vg24 { + bits<3> Zn; + bits<3> Zd; + let Inst{20} = 0b1; + let Inst{17} = 0b1; + let Inst{16} = q; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; + let Inst{4-2} = Zd; + let Inst{1} = op; +} + +multiclass sme2_zip_vector_vg4 { + def _B : sme2_zip_vector_vg4<0b00, 0b0, op, ZZZZ_b_mul_r, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_zip_vector_vg4<0b01, 0b0, op, ZZZZ_h_mul_r, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_zip_vector_vg4<0b10, 0b0, op, ZZZZ_s_mul_r, ZZZZ_s_mul_r, mnemonic>; + def _D : sme2_zip_vector_vg4<0b11, 0b0, op, ZZZZ_d_mul_r, ZZZZ_d_mul_r, mnemonic>; + def _Q : sme2_zip_vector_vg4<0b00, 0b1, op, ZZZZ_q_mul_r, ZZZZ_q_mul_r, mnemonic>; +} + +class sme2_zip_vector_vg2 sz, bit q, bit op, + RegisterOperand multi_vector_ty, ZPRRegOp vector_ty, + string mnemonic> + : I<(outs multi_vector_ty:$Zd), (ins vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$Zd, $Zn, $Zm", + "", []>, Sched<[]> { + bits<5> Zm; + bits<5> Zn; + bits<4> Zd; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-16} = Zm; + let Inst{15-11} = 0b11010; + let Inst{10} = q; + let Inst{9-5} = Zn; + let Inst{4-1} = Zd; + let Inst{0} = op; +} + +multiclass sme2_zip_vector_vg2 { + def _B : sme2_zip_vector_vg2<0b00, 0b0, op, ZZ_b_mul_r, ZPR8, mnemonic>; + def _H : sme2_zip_vector_vg2<0b01, 0b0, op, ZZ_h_mul_r, ZPR16, mnemonic>; + def _S : sme2_zip_vector_vg2<0b10, 0b0, op, ZZ_s_mul_r, ZPR32, mnemonic>; + def _D : sme2_zip_vector_vg2<0b11, 0b0, op, ZZ_d_mul_r, ZPR64, mnemonic>; + def _Q : sme2_zip_vector_vg2<0b00, 0b1, op, ZZ_q_mul_r, ZPR128, mnemonic>; +} + +//===----------------------------------------------------------------------===// +// SME2 MOV + +class sme2_mova_vec_to_tile_vg2_multi_base sz, bit v, + RegisterOperand tile_ty, + Operand index_ty, + RegisterOperand vector_ty, + string mnemonic> + : I<(outs tile_ty:$ZAd), + (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, vector_ty:$Zn), + mnemonic, "\t$ZAd[$Rs, $imm, vgx2], $Zn", + "", []>, Sched<[]> { + bits<2> Rs; + bits<4> Zn; + let Inst{31-24} = 0b11000000; + let Inst{23-22} = sz; + let Inst{21-16} = 0b000100; + let Inst{15} = v; + let Inst{14-13} = Rs; + let Inst{12-10} = 0b000; + let Inst{9-6} = Zn; + let Inst{5-3} = 0b000; + + let Constraints = "$ZAd = $_ZAd"; +} + +multiclass sme2_mova_vec_to_tile_or_array_aliases { + def : InstAlias; + +} + +multiclass sme2_mova_vec_to_tile_vg2_multi_base { + + def _B : sme2_mova_vec_to_tile_vg2_multi_base<0b00, v, + !if(v, TileVectorOpV8, + TileVectorOpH8), + uimm3s2range, ZZ_b_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<3> imm; + let Inst{2-0} = imm; + } + + def _H : sme2_mova_vec_to_tile_vg2_multi_base<0b01, v, + !if(v, TileVectorOpV16, + TileVectorOpH16), + uimm2s2range, ZZ_h_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<1> ZAd; + bits<2> imm; + let Inst{2} = ZAd; + let Inst{1-0} = imm; + } + + def _S : sme2_mova_vec_to_tile_vg2_multi_base<0b10, v, + !if(v, TileVectorOpV32, + TileVectorOpH32), + uimm1s2range, ZZ_s_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<2> ZAd; + bits<1> imm; + let Inst{2-1} = ZAd; + let Inst{0} = imm; + } + + def _D : sme2_mova_vec_to_tile_vg2_multi_base<0b11, v, + !if(v, TileVectorOpV64, + TileVectorOpH64), + uimm0s2range, ZZ_d_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<3> ZAd; + let Inst{2-0} = ZAd; + } + + def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo; + def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo; + def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo; + def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo; + + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + def : SME2_Tile_VG2_Multi_Pat; + + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _B), + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm3s2range, ZZ_b_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _H), + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm2s2range, ZZ_h_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _S), + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm1s2range, ZZ_s_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _D), + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s2range, ZZ_d_mul_r, + "mov">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _B), + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm3s2range, ZZ_b_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _H), + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm2s2range, ZZ_h_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _S), + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm1s2range, ZZ_s_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _D), + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s2range, ZZ_d_mul_r, + "mova">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _B), + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm3s2range, ZZ_b_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _H), + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm2s2range, ZZ_h_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _S), + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm1s2range, ZZ_s_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _D), + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s2range, ZZ_d_mul_r, + "mova">; +} + +multiclass sme2_mova_vec_to_tile_vg2_multi{ + defm _H : sme2_mova_vec_to_tile_vg2_multi_base<0b0, mnemonic, int_h>; + defm _V : sme2_mova_vec_to_tile_vg2_multi_base<0b1, mnemonic, int_v>; +} + +class sme2_mova_vec_to_tile_vg4_multi_base sz, bit v, + RegisterOperand tile_ty, + Operand index_ty, + RegisterOperand vector_ty, + string mnemonic> + : I<(outs tile_ty:$ZAd), + (ins tile_ty:$_ZAd, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm, + vector_ty:$Zn), + mnemonic, + "\t$ZAd[$Rs, $imm, vgx4], $Zn", + "", []>, Sched<[]> { + bits<2> Rs; + bits<3> Zn; + let Inst{31-24} = 0b11000000; + let Inst{23-22} = sz; + let Inst{21-16} = 0b000100; + let Inst{15} = v; + let Inst{14-13} = Rs; + let Inst{12-10} = 0b001; + let Inst{9-7} = Zn; + let Inst{6-3} = 0b0000; + + let Constraints = "$ZAd = $_ZAd"; +} + +multiclass sme2_mova_vec_to_tile_vg4_multi_base { + + def _B : sme2_mova_vec_to_tile_vg4_multi_base<0b00, v, + !if(v, TileVectorOpV8, + TileVectorOpH8), + uimm2s4range, ZZZZ_b_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<2> imm; + let Inst{2} = 0b0; + let Inst{1-0} = imm; + } + + def _H : sme2_mova_vec_to_tile_vg4_multi_base<0b01, v, + !if(v, TileVectorOpV16, + TileVectorOpH16), + uimm1s4range, ZZZZ_h_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<1> ZAd; + bits<1> imm; + let Inst{2} = 0b0; + let Inst{1} = ZAd; + let Inst{0} = imm; + } + + def _S : sme2_mova_vec_to_tile_vg4_multi_base<0b10, v, + !if(v, TileVectorOpV32, + TileVectorOpH32), + uimm0s4range, ZZZZ_s_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<2> ZAd; + let Inst{2} = 0b0; + let Inst{1-0} = ZAd; + } + + def _D : sme2_mova_vec_to_tile_vg4_multi_base<0b11, v, + !if(v, TileVectorOpV64, + TileVectorOpH64), + uimm0s4range, ZZZZ_d_mul_r, + mnemonic>, SMEPseudo2Instr { + bits<3> ZAd; + let Inst{2-0} = ZAd; + } + + def NAME # _B_PSEUDO : sme2_move_to_tile_pseudo; + def NAME # _H_PSEUDO : sme2_move_to_tile_pseudo; + def NAME # _S_PSEUDO : sme2_move_to_tile_pseudo; + def NAME # _D_PSEUDO : sme2_move_to_tile_pseudo; + + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + def : SME2_Tile_VG4_Multi_Pat; + + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _B), + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm2s4range, ZZZZ_b_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _H), + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm1s4range, ZZZZ_h_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _S), + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm0s4range, ZZZZ_s_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME # _D), + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s4range, ZZZZ_d_mul_r, + "mov">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _B), + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm2s4range, ZZZZ_b_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _H), + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm1s4range, ZZZZ_h_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _S), + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm0s4range, ZZZZ_s_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME # _D), + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s4range, ZZZZ_d_mul_r, + "mova">; + +} + +multiclass sme2_mova_vec_to_tile_vg4_multi{ + defm _H : sme2_mova_vec_to_tile_vg4_multi_base<0b0, mnemonic, int_h>; + defm _V : sme2_mova_vec_to_tile_vg4_multi_base<0b1, mnemonic, int_v>; +} + +class sme2_mova_vec_to_array_vg24_multi + : I<(outs array_ty:$ZAd), + (ins array_ty:$_ZAd, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm, + vector_ty:$Zn), + mnemonic, "\t$ZAd[$Rs, $imm, " # vg_acronym # "], $Zn", + "", []>, Sched<[]> { + bits<2> Rs; + bits<3> imm; + let Inst{31-15} = 0b11000000000001000; + let Inst{14-13} = Rs; + let Inst{12-11} = 0b01; + let Inst{5-3} = 0b000; + let Inst{2-0} = imm; + + let Constraints = "$ZAd = $_ZAd"; +} + +multiclass sme2_mova_vec_to_array_vg2_multi { + def NAME : sme2_mova_vec_to_array_vg24_multi, SMEPseudo2Instr { + bits<4> Zn; + let Inst{10} = 0b0; + let Inst{9-6} = Zn; + } + + def NAME # _PSEUDO : sme2_move_to_za_pseudo; + + def : SME2_ZA_VG1x2_Multi_Pat; + def : SME2_ZA_VG1x2_Multi_Pat; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_b_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_h_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_s_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_d_mul_r, + "mova">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_b_mul_r, + "mova", "vgx2">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_h_mul_r, + "mova", "vgx2">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_s_mul_r, + "mova", "vgx2">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_b_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_h_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_s_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_d_mul_r, + "mov">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_b_mul_r, + "mov", "vgx2">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_h_mul_r, + "mov", "vgx2">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_s_mul_r, + "mov", "vgx2">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME), + MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZ_d_mul_r, + "mov", "vgx2">; +} + +multiclass sme2_mova_vec_to_array_vg4_multi { + def NAME : sme2_mova_vec_to_array_vg24_multi, SMEPseudo2Instr { + bits<3> Zn; + let Inst{10} = 0b1; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + } + + def NAME # _PSEUDO : sme2_move_to_za_pseudo; + + def : SME2_ZA_VG1x4_Multi_Pat; + def : SME2_ZA_VG1x4_Multi_Pat; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_b_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_h_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_s_mul_r, + "mova">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_d_mul_r, + "mova">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_b_mul_r, + "mova", "vgx4">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_h_mul_r, + "mova", "vgx4">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_s_mul_r, + "mova", "vgx4">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_b_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_h_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_s_mul_r, + "mov">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_d_mul_r, + "mov">; + + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_b_mul_r, + "mov", "vgx4">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_h_mul_r, + "mov", "vgx4">; + defm : sme2_mova_vec_to_tile_or_array_aliases<0, !cast(NAME), + MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_s_mul_r, + "mov", "vgx4">; + defm : sme2_mova_vec_to_tile_or_array_aliases<1, !cast(NAME), + MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, ZZZZ_d_mul_r, + "mov", "vgx4">; + +} + +class sme2_mova_tile_to_vec_vg2_multi_base sz, bit v, + RegisterOperand vector_ty, + RegisterOperand tile_ty, + Operand index_ty, + string mnemonic> + : I<(outs vector_ty:$Zd), + (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), + mnemonic, + "\t$Zd, $ZAn[$Rs, $imm, vgx2]", + "", []>, Sched<[]> { + bits<4> Zd; + bits<2> Rs; + let Inst{31-24} = 0b11000000; + let Inst{23-22} = sz; + let Inst{21-16} = 0b000110; + let Inst{15} = v; + let Inst{14-13} = Rs; + let Inst{12-8} = 0b00000; + let Inst{4-1} = Zd; + let Inst{0} = 0b0; +} + +multiclass sme2_mova_tile_or_array_to_vec_aliases { +def : InstAlias; + +} + +multiclass sme2_mova_tile_to_vec_vg2_multi_inst { + + def _B : sme2_mova_tile_to_vec_vg2_multi_base<0b00, v, ZZ_b_mul_r, + !if(v, TileVectorOpV8, + TileVectorOpH8), + uimm3s2range, mnemonic> { + bits<3> imm; + let Inst{7-5} = imm; + } + + def _H : sme2_mova_tile_to_vec_vg2_multi_base<0b01, v, ZZ_h_mul_r, + !if(v, TileVectorOpV16, + TileVectorOpH16), + uimm2s2range, mnemonic> { + bits<1> ZAn; + bits<2> imm; + let Inst{7} = ZAn; + let Inst{6-5} = imm; + } + + def _S : sme2_mova_tile_to_vec_vg2_multi_base<0b10, v, ZZ_s_mul_r, + !if(v, TileVectorOpV32, + TileVectorOpH32), + uimm1s2range, mnemonic> { + bits<2> ZAn; + bits<1> imm; + let Inst{7-6} = ZAn; + let Inst{5} = imm; + } + + def _D : sme2_mova_tile_to_vec_vg2_multi_base<0b11, v, ZZ_d_mul_r, + !if(v, TileVectorOpV64, + TileVectorOpH64), + uimm0s2range, mnemonic> { + bits<3> ZAn; + let Inst{7-5} = ZAn; + } + + defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast(NAME # _B), + ZZ_b_mul_r, + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm3s2range, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1,!cast(NAME # _H), + ZZ_h_mul_r, + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm2s2range, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _S), + ZZ_s_mul_r, + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm1s2range, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _D), + ZZ_d_mul_r, + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s2range, "mov">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast(NAME # _B), + ZZ_b_mul_r, + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm3s2range, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0,!cast(NAME # _H), + ZZ_h_mul_r, + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm2s2range, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _S), + ZZ_s_mul_r, + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm1s2range, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _D), + ZZ_d_mul_r, + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s2range, "mova">; + +} + +multiclass sme2_mova_tile_to_vec_vg2_multi{ + defm _H : sme2_mova_tile_to_vec_vg2_multi_inst<0b0, mnemonic>; + defm _V : sme2_mova_tile_to_vec_vg2_multi_inst<0b1, mnemonic>; +} + + +class sme2_mova_tile_to_vec_vg4_multi_base sz, bit v, + RegisterOperand vector_ty, + RegisterOperand tile_ty, + Operand index_ty, + string mnemonic> + : I<(outs vector_ty:$Zd), + (ins tile_ty:$ZAn, MatrixIndexGPR32Op12_15:$Rs, index_ty:$imm), + mnemonic, + "\t$Zd, $ZAn[$Rs, $imm, vgx4]", + "", []>, Sched<[]> { + bits<3> Zd; + bits<2> Rs; + let Inst{31-24} = 0b11000000; + let Inst{23-22} = sz; + let Inst{21-16} = 0b000110; + let Inst{15} = v; + let Inst{14-13} = Rs; + let Inst{12-8} = 0b00100; + let Inst{4-2} = Zd; + let Inst{1-0} = 0b00; +} + +multiclass sme2_mova_tile_to_vec_vg4_multi_base { + + def _B : sme2_mova_tile_to_vec_vg4_multi_base<0b00, v, + ZZZZ_b_mul_r, + !if(v, TileVectorOpV8, + TileVectorOpH8), + uimm2s4range, mnemonic> { + bits<2> imm; + let Inst{7} = 0b0; + let Inst{6-5} = imm; + } + + def _H : sme2_mova_tile_to_vec_vg4_multi_base<0b01, v, + ZZZZ_h_mul_r, + !if(v, TileVectorOpV16, + TileVectorOpH16), + uimm1s4range, mnemonic> { + bits<1> ZAn; + bits<1> imm; + let Inst{7} = 0b0; + let Inst{6} = ZAn; + let Inst{5} = imm; + } + + def _S : sme2_mova_tile_to_vec_vg4_multi_base<0b10, v, + ZZZZ_s_mul_r, + !if(v, TileVectorOpV32, + TileVectorOpH32), + uimm0s4range, mnemonic> { + bits<2> ZAn; + let Inst{7} = 0b0; + let Inst{6-5} = ZAn; + } + + def _D : sme2_mova_tile_to_vec_vg4_multi_base<0b11, v, + ZZZZ_d_mul_r, + !if(v, TileVectorOpV64, + TileVectorOpH64), + uimm0s4range, mnemonic> { + bits<3> ZAn; + let Inst{7-5} = ZAn; + } + + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _B), + ZZZZ_b_mul_r, + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm2s4range, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _H), + ZZZZ_h_mul_r, + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm1s4range, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _S), + ZZZZ_s_mul_r, + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm0s4range, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME # _D), + ZZZZ_d_mul_r, + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s4range, "mov">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _B), + ZZZZ_b_mul_r, + !if(v, TileVectorOpV8, + TileVectorOpH8), + MatrixIndexGPR32Op12_15, + uimm2s4range, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _H), + ZZZZ_h_mul_r, + !if(v, TileVectorOpV16, + TileVectorOpH16), + MatrixIndexGPR32Op12_15, + uimm1s4range, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _S), + ZZZZ_s_mul_r, + !if(v, TileVectorOpV32, + TileVectorOpH32), + MatrixIndexGPR32Op12_15, + uimm0s4range, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME # _D), + ZZZZ_d_mul_r, + !if(v, TileVectorOpV64, + TileVectorOpH64), + MatrixIndexGPR32Op12_15, + uimm0s4range, "mova">; + +} +multiclass sme2_mova_tile_to_vec_vg4_multi{ + defm _H : sme2_mova_tile_to_vec_vg4_multi_base<0b0, mnemonic>; + defm _V : sme2_mova_tile_to_vec_vg4_multi_base<0b1, mnemonic>; +} + +class sme2_mova_array_to_vec_vg24_multi + : I<(outs vector_ty:$Zd), + (ins array_ty:$ZAn, MatrixIndexGPR32Op8_11:$Rs, sme_elm_idx0_7:$imm), + mnemonic, + "\t$Zd, $ZAn[$Rs, $imm, " # vg_acronym # "]", + "", []>, Sched<[]> { + bits<2> Rs; + bits<3> imm; + let Inst{31-15} = 0b11000000000001100; + let Inst{14-13} = Rs; + let Inst{12-11} = 0b01; + let Inst{9-8} = 0b00; + let Inst{7-5} = imm; + let Inst{0} = 0b0; +} + +multiclass sme2_mova_array_to_vec_vg2_multi { + def NAME : sme2_mova_array_to_vec_vg24_multi{ + bits<4> Zd; + let Inst{10} = 0b0; + let Inst{4-1} = Zd; + } + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_d_mul_r, MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova", "vgx2">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova", "vgx2">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova", "vgx2">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_d_mul_r, MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx2">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx2">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx2">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME), + ZZ_d_mul_r, MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx2">; +} + +multiclass sme2_mova_array_to_vec_vg4_multi { + def NAME : sme2_mova_array_to_vec_vg24_multi{ + bits<3> Zd; + let Inst{10} = 0b1; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; + } + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_d_mul_r, MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova", "vgx4">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova", "vgx4">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mova", "vgx4">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_d_mul_r, MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov">; + + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_b_mul_r, MatrixOp8, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx4">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_h_mul_r, MatrixOp16, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx4">; + defm : sme2_mova_tile_or_array_to_vec_aliases<0, !cast(NAME), + ZZZZ_s_mul_r, MatrixOp32, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx4">; + defm : sme2_mova_tile_or_array_to_vec_aliases<1, !cast(NAME), + ZZZZ_d_mul_r, MatrixOp64, + MatrixIndexGPR32Op8_11, + sme_elm_idx0_7, "mov", "vgx4">; +} + +//===----------------------------------------------------------------------===// +// SME2 multi-vec saturating shift right narrow +class sme2_sat_shift_vector_vg2 + : I<(outs ZPR16:$Zd), (ins ZZ_s_mul_r:$Zn, tvecshiftR16:$imm4), + mnemonic, "\t$Zd, $Zn, $imm4", + "", []>, Sched<[]> { + bits<4> imm4; + bits<4> Zn; + bits<5> Zd; + let Inst{31-21} = 0b11000001111; + let Inst{20} = op; + let Inst{19-16} = imm4; + let Inst{15-10} = 0b110101; + let Inst{9-6} = Zn; + let Inst{5} = u; + let Inst{4-0} = Zd; +} + +multiclass sme2_sat_shift_vector_vg2 { + def NAME : sme2_sat_shift_vector_vg2; + + def : SME2_Sat_Shift_VG2_Pat; +} + +class sme2_sat_shift_vector_vg4 op, + ZPRRegOp zpr_ty, RegisterOperand vector_ty, + Operand imm_ty, string mnemonic> + : I<(outs zpr_ty:$Zd), (ins vector_ty:$Zn, imm_ty:$imm), + mnemonic, "\t$Zd, $Zn, $imm", + "", []>, Sched<[]> { + bits<3> Zn; + bits<5> Zd; + let Inst{31-24} = 0b11000001; + let Inst{21} = 0b1; + let Inst{15-11} = 0b11011; + let Inst{10} = n; + let Inst{9-7} = Zn; + let Inst{6-5} = op; + let Inst{4-0} = Zd; +} + +multiclass sme2_sat_shift_vector_vg4 op, SDPatternOperator intrinsic> { + def _B : sme2_sat_shift_vector_vg4{ + bits<5> imm; + let Inst{23-22} = 0b01; + let Inst{20-16} = imm; + } + def _H : sme2_sat_shift_vector_vg4 { + bits<6> imm; + let Inst{23} = 0b1; + let Inst{22} = imm{5}; + let Inst{20-16} = imm{4-0}; + } + + def : SME2_Sat_Shift_VG4_Pat; + def : SME2_Sat_Shift_VG4_Pat; +} + +//===----------------------------------------------------------------------===// +// SME2 signed saturating doubling multiply high +class sme2_sqdmulh_vector_vg2_single sz, RegisterOperand vector_ty, + ZPRRegOp zpr_ty, string mnemonic> + : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), + mnemonic, "\t$Zdn, $_Zdn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<4> Zdn; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b10; + let Inst{19-16} = Zm; + let Inst{15-5} = 0b10100100000; + let Inst{4-1} = Zdn; + let Inst{0} = 0b0; + let Constraints = "$Zdn = $_Zdn"; +} + +multiclass sme2_sqdmulh_vector_vg2_single { + def _B : sme2_sqdmulh_vector_vg2_single<0b00, ZZ_b_mul_r, ZPR4b8, mnemonic>; + def _H : sme2_sqdmulh_vector_vg2_single<0b01, ZZ_h_mul_r, ZPR4b16, mnemonic>; + def _S : sme2_sqdmulh_vector_vg2_single<0b10, ZZ_s_mul_r, ZPR4b32, mnemonic>; + def _D : sme2_sqdmulh_vector_vg2_single<0b11, ZZ_d_mul_r, ZPR4b64, mnemonic>; +} +class sme2_sqdmulh_vector_vg4_single sz, RegisterOperand vector_ty, + ZPRRegOp zpr_ty, string mnemonic> + : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, zpr_ty:$Zm), + mnemonic, "\t$Zdn, $_Zdn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<3> Zdn; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21-20} = 0b10; + let Inst{19-16} = Zm; + let Inst{15-5} = 0b10101100000; + let Inst{4-2} = Zdn; + let Inst{1-0} = 0b00; + let Constraints = "$Zdn = $_Zdn"; +} + +multiclass sme2_sqdmulh_vector_vg4_single { + def _B : sme2_sqdmulh_vector_vg4_single<0b00, ZZZZ_b_mul_r, ZPR4b8, mnemonic>; + def _H : sme2_sqdmulh_vector_vg4_single<0b01, ZZZZ_h_mul_r, ZPR4b16, mnemonic>; + def _S : sme2_sqdmulh_vector_vg4_single<0b10, ZZZZ_s_mul_r, ZPR4b32, mnemonic>; + def _D : sme2_sqdmulh_vector_vg4_single<0b11, ZZZZ_d_mul_r, ZPR4b64, mnemonic>; +} + +class sme2_sqdmulh_vector_vg2_multi sz, RegisterOperand vector_ty, + string mnemonic> + : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), + mnemonic, "\t$Zdn, $_Zdn, $Zm", + "", []>, Sched<[]> { + bits<4> Zm; + bits<4> Zdn; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-17} = Zm; + let Inst{16-5} = 0b010110100000; + let Inst{4-1} = Zdn; + let Inst{0} = 0b0; + let Constraints = "$Zdn = $_Zdn"; +} +multiclass sme2_sqdmulh_vector_vg2_multi { + def _B : sme2_sqdmulh_vector_vg2_multi<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_sqdmulh_vector_vg2_multi<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_sqdmulh_vector_vg2_multi<0b10, ZZ_s_mul_r, mnemonic>; + def _D : sme2_sqdmulh_vector_vg2_multi<0b11, ZZ_d_mul_r, mnemonic>; +} + +class sme2_sqdmulh_vector_vg4_multi sz, RegisterOperand vector_ty, + string mnemonic> + : I<(outs vector_ty:$Zdn), (ins vector_ty:$_Zdn, vector_ty:$Zm), + mnemonic, "\t$Zdn, $_Zdn, $Zm", + "", []>, Sched<[]> { + bits<3> Zm; + bits<3> Zdn; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{20-18} = Zm; + let Inst{17-5} = 0b0010111100000; + let Inst{4-2} = Zdn; + let Inst{1-0} = 0b00; + let Constraints = "$Zdn = $_Zdn"; +} + +multiclass sme2_sqdmulh_vector_vg4_multi { + def _B : sme2_sqdmulh_vector_vg4_multi<0b00, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_sqdmulh_vector_vg4_multi<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_sqdmulh_vector_vg4_multi<0b10, ZZZZ_s_mul_r, mnemonic>; + def _D : sme2_sqdmulh_vector_vg4_multi<0b11, ZZZZ_d_mul_r, mnemonic>; +} + +//===----------------------------------------------------------------------===// +// SME2 Multi-vector - FRINT + +class sme2_frint_vector_vg24_multi opc, + RegisterOperand vector_ty> + : I<(outs vector_ty:$Zd), (ins vector_ty:$Zn), + mnemonic, "\t$Zd, $Zn", "", []>, Sched<[]> { + let Inst{31-21} = 0b11000001101; + let Inst{20} = vg4; + let Inst{19} = 0b1; + let Inst{18-16} = opc; + let Inst{15-10} = 0b111000; + let Inst{0} = 0b0; +} + +multiclass sme2_frint_vector_vg2_multi opc> { + def _S : sme2_frint_vector_vg24_multi { + bits<4> Zn; + bits<4> Zd; + let Inst{9-6} = Zn; + let Inst{5} = 0b0; + let Inst{4-1} = Zd; + } +} + +multiclass sme2_frint_vector_vg4_multi opc> { + def _S : sme2_frint_vector_vg24_multi { + bits<3> Zn; + bits<3> Zd; + let Inst{9-7} = Zn; + let Inst{6-5} = 0b00; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; + } +} + +//===----------------------------------------------------------------------===// +// SME2 Multi-vector - SVE Select +class sme2_sel_vector_vg24 sz, RegisterOperand vector_ty, + string mnemonic> + : I<(outs vector_ty:$Zd), + (ins PNRAny_p8_p15:$PNg, vector_ty:$Zn, vector_ty:$Zm), + mnemonic, "\t$Zd, $PNg, $Zn, $Zm", + "", []>, Sched<[]> { + bits<3> PNg; + let Inst{31-24} = 0b11000001; + let Inst{23-22} = sz; + let Inst{21} = 0b1; + let Inst{15-13} = 0b100; + let Inst{12-10} = PNg; + let Inst{5} = 0b0; + let Inst{0} = 0b0; +} + +class sme2_sel_vector_vg2 sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_sel_vector_vg24 { + bits<4> Zm; + bits<4> Zn; + bits<4> Zd; + let Inst{20-17} = Zm; + let Inst{16} = 0b0; + let Inst{9-6} = Zn; + let Inst{4-1} = Zd; +} + +multiclass sme2_sel_vector_vg2{ + def _B : sme2_sel_vector_vg2<0b00, ZZ_b_mul_r, mnemonic>; + def _H : sme2_sel_vector_vg2<0b01, ZZ_h_mul_r, mnemonic>; + def _S : sme2_sel_vector_vg2<0b10, ZZ_s_mul_r, mnemonic>; + def _D : sme2_sel_vector_vg2<0b11, ZZ_d_mul_r, mnemonic>; +} +class sme2_sel_vector_vg4 sz, RegisterOperand vector_ty, + string mnemonic> + : sme2_sel_vector_vg24 { + bits<3> Zm; + bits<3> Zn; + bits<3> Zd; + let Inst{20-18} = Zm; + let Inst{17-16} = 0b01; + let Inst{9-7} = Zn; + let Inst{6} = 0b0; + let Inst{4-2} = Zd; + let Inst{1} = 0b0; +} +multiclass sme2_sel_vector_vg4 { + def _B : sme2_sel_vector_vg4<0b00, ZZZZ_b_mul_r, mnemonic>; + def _H : sme2_sel_vector_vg4<0b01, ZZZZ_h_mul_r, mnemonic>; + def _S : sme2_sel_vector_vg4<0b10, ZZZZ_s_mul_r, mnemonic>; + def _D : sme2_sel_vector_vg4<0b11, ZZZZ_d_mul_r, mnemonic>; +} + +//===----------------------------------------------------------------------===// +// Non contiguous Load and Store + +class sme2_ld_vector_vg24_multi_scalar_scalar msz, bit n, + RegisterOperand multi_vector_ty, + RegisterOperand gpr_ty, + string mnemonic> + : I<(outs multi_vector_ty:$Zt), + (ins PNRAny_p8_p15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), + mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", + "", []>, Sched<[]> { + bits<5> Rm; + bits<3> PNg; + bits<5> Rn; + let Inst{31-21} = 0b10100001000; + let Inst{20-16} = Rm; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{3} = n; + + let mayLoad = 1; +} +class sme2_ld_vector_vg2_multi_scalar_scalar msz, bit n, + RegisterOperand multi_vector_ty, + RegisterOperand gpr_ty, + string mnemonic> + : sme2_ld_vector_vg24_multi_scalar_scalar { + bits<4> Zt; + let Inst{15} = 0b0; + let Inst{4} = Zt{3}; + let Inst{2-0} = Zt{2-0}; +} + +class sme2_ld_vector_vg4_multi_scalar_scalar msz, bit n, + RegisterOperand multi_vector_ty, + RegisterOperand gpr_ty, + string mnemonic> + : sme2_ld_vector_vg24_multi_scalar_scalar { + bits<3> Zt; + let Inst{15} = 0b1; + let Inst{4} = Zt{2}; + let Inst{2} = 0b0; + let Inst{1-0} = Zt{1-0}; +} + + +class sme2_ld_vector_vg24_multi_scalar_immediate msz, bit n, + RegisterOperand multi_vector_ty, + Operand index_ty, + string mnemonic> + : I<(outs multi_vector_ty:$Zt), + (ins PNRAny_p8_p15:$PNg, GPR64sp:$Rn, index_ty:$imm4), + mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<4> imm4; + bits<3> PNg; + bits<5> Rn; + let Inst{31-20} = 0b101000010100; + let Inst{19-16} = imm4; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{3} = n; + + let mayLoad = 1; +} + +multiclass sme2_ld_vector_vg2_multi_scalar_immediate msz, bit n, + RegisterOperand multi_vector_ty, + Operand index_ty, + string mnemonic>{ + def NAME : sme2_ld_vector_vg24_multi_scalar_immediate { + bits<4> Zt; + let Inst{15} = 0b0; + let Inst{4} = Zt{3}; + let Inst{2-0} = Zt{2-0}; +} + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 0>; + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, 0), 1>; +} + +multiclass sme2_ld_vector_vg4_multi_scalar_immediate msz, bit n, + RegisterOperand multi_vector_ty, + Operand index_ty, + string mnemonic> { + def NAME : sme2_ld_vector_vg24_multi_scalar_immediate { + bits<3> Zt; + let Inst{15} = 0b1; + let Inst{4} = Zt{2}; + let Inst{2} = 0b0; + let Inst{1-0} = Zt{1-0}; +} + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 0>; + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, 0), 1>; +} + +//===----------------------------------------------------------------------===// +// SME2 Non-Contiguous Store +class sme2_st_vector_vg24_multi_scalar_scalar msz, bit n, + RegisterOperand multi_vector_ty, + RegisterOperand gpr_ty, + string mnemonic> + : I<(outs ), + (ins multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), + mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", + "", []>, Sched<[]> { + bits<5> Rm; + bits<3> PNg; + bits<5> Rn; + let Inst{31-21} = 0b10100001001; + let Inst{20-16} = Rm; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{3} = n; + + let mayStore = 1; +} + +class sme2_st_vector_vg2_multi_scalar_scalar msz, bit n, + RegisterOperand multi_vector_ty, + RegisterOperand gpr_ty, + string mnemonic> + : sme2_st_vector_vg24_multi_scalar_scalar { + bits<4> Zt; + let Inst{15} = 0b0; + let Inst{4} = Zt{3}; + let Inst{2-0} = Zt{2-0}; +} + +class sme2_st_vector_vg4_multi_scalar_scalar msz, bit n, + RegisterOperand multi_vector_ty, + RegisterOperand gpr_ty, + string mnemonic> + : sme2_st_vector_vg24_multi_scalar_scalar { + bits<3> Zt; + let Inst{15} = 0b1; + let Inst{4} = Zt{2}; + let Inst{2} = 0b0; + let Inst{1-0} = Zt{1-0}; +} + +class sme2_st_vector_vg24_multi_scalar_immediate msz, bit n, + RegisterOperand multi_vector_ty, + Operand index_ty, + string mnemonic> + : I<(outs ), + (ins multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, index_ty:$imm4), + mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<4> imm4; + bits<3> PNg; + bits<5> Rn; + let Inst{31-20} = 0b101000010110; + let Inst{19-16} = imm4; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{3} = n; + + let mayStore = 1; +} + + +multiclass sme2_st_vector_vg2_multi_scalar_immediate msz, bit n, + RegisterOperand multi_vector_ty, + Operand index_ty, + string mnemonic> { + def NAME: sme2_st_vector_vg24_multi_scalar_immediate { + bits<4> Zt; + let Inst{15} = 0b0; + let Inst{4} = Zt{3}; + let Inst{2-0} = Zt{2-0}; +} + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 0>; + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn,0), 1>; +} + +multiclass sme2_st_vector_vg4_multi_scalar_immediate msz, bit n, + RegisterOperand multi_vector_ty, + Operand index_ty, + string mnemonic> { + def NAME : sme2_st_vector_vg24_multi_scalar_immediate { + bits<3> Zt; + let Inst{15} = 0b1; + let Inst{4} = Zt{2}; + let Inst{2} = 0b0; + let Inst{1-0} = Zt{1-0}; +} + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, index_ty:$imm4), 0>; + + def : InstAlias(NAME) multi_vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn,0), 1>; +} diff --git a/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp b/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/SMEPeepholeOpt.cpp @@ -0,0 +1,163 @@ +//===- SMEPeepholeOpt.cpp - SME peephole optimization pass ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// + +#include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" +#include "Utils/AArch64SMEAttributes.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-sme-peephole-opt" + +namespace { + +struct SMEPeepholeOpt : public MachineFunctionPass { + static char ID; + + SMEPeepholeOpt() : MachineFunctionPass(ID) { + initializeSMEPeepholeOptPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + + StringRef getPassName() const override { + return "SME Peephole Optimization pass"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool optimizeStartStopPairs(MachineBasicBlock &MBB) const; +}; + +char SMEPeepholeOpt::ID = 0; + +} // end anonymous namespace + +static bool isConditionalStartStop(const MachineInstr *MI) { + return MI->getOpcode() == AArch64::MSRpstatePseudo; +} + +static bool isMatchingStartStopPair(const MachineInstr *MI1, + const MachineInstr *MI2) { + // We only consider the same type of streaming mode change here, i.e. + // start/stop SM, or start/stop ZA pairs. + if (MI1->getOperand(0).getImm() != MI2->getOperand(0).getImm()) + return false; + + bool IsConditional = isConditionalStartStop(MI2); + if (isConditionalStartStop(MI1) != IsConditional) + return false; + + if (!IsConditional) + return true; + + // Check to make sure the conditional start/stop pairs are identical. + return MI1->getOperand(3).getImm() == MI2->getOperand(3).getImm(); +} + +bool SMEPeepholeOpt::optimizeStartStopPairs(MachineBasicBlock &MBB) const { + SmallVector ToBeRemoved; + MachineInstr *Prev = nullptr; + bool PrevIsStart = false; + + for (MachineInstr &MI : make_early_inc_range(MBB)) { + // Walk through instructions in the block trying to find pairs of smstart + // and smstop nodes that cancel each other out. We only permit a limited + // set of instructions to appear between them, otherwise we reset our + // tracking. + switch (MI.getOpcode()) { + default: + Prev = nullptr; + break; + case AArch64::BL: { + // Permits calls to __arm_sme_state. + if (!MI.getOperand(0).isSymbol() || + strcmp(MI.getOperand(0).getSymbolName(), "__arm_sme_state")) + Prev = nullptr; + break; + } + case AArch64::COPY: { + // Permit copies of 32 and 64-bit registers. + if (!MI.getOperand(1).isReg()) { + Prev = nullptr; + break; + } + Register Reg = MI.getOperand(1).getReg(); + if (!AArch64::GPR32RegClass.contains(Reg) && + !AArch64::GPR64RegClass.contains(Reg)) + Prev = nullptr; + break; + } + case AArch64::ADJCALLSTACKDOWN: + case AArch64::ADJCALLSTACKUP: + case AArch64::ANDXri: + // We permit these as they don't generate SVE/NEON instructions. + break; + case AArch64::MSRpstatesvcrImm1: + case AArch64::MSRpstatePseudo: { + // Pairs of smstart/smstop nodes must either both be unconditional or + // both be conditional. + if (Prev && !isMatchingStartStopPair(Prev, &MI)) { + Prev = nullptr; + break; + } + + assert((MI.getOperand(1).getImm() < 2) && "Invalid SM state"); + bool CurIsStart = (MI.getOperand(1).getImm() != 0); + if (Prev && CurIsStart != PrevIsStart) { + ToBeRemoved.push_back(Prev); + ToBeRemoved.push_back(&MI); + } + + if (Prev) + Prev = nullptr; + else { + Prev = &MI; + PrevIsStart = CurIsStart; + } + break; + } + } + } + + for (MachineInstr *MI : ToBeRemoved) + MI->eraseFromParent(); + + return ToBeRemoved.size(); +} + +INITIALIZE_PASS(SMEPeepholeOpt, "aarch64-sme-peephole-opt", + "SME Peephole Optimization", false, false) + +bool SMEPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + if (!MF.getSubtarget().hasSME()) + return false; + + assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!"); + + bool Changed = false; + + // Even if the block lives in a function with no SME attributes attached we + // still have to analyze all the blocks because we may call a streaming + // function that requires smstart/smstop pairs. + for (MachineBasicBlock &MBB : MF) { + Changed |= optimizeStartStopPairs(MBB); + } + + return Changed; +} + +FunctionPass *llvm::createSMEPeepholeOptPass() { return new SMEPeepholeOpt(); } diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -8652,3 +8652,279 @@ def _S : sve2p1_fclamp; def _D : sve2p1_fclamp; } + + +class sve2p1_ptrue_pn sz, PNRP8_15RegOp pnrty, SDPatternOperator op> + : I<(outs pnrty:$PNd), (ins), mnemonic, "\t$PNd", + "", [(set pnrty:$PNd, (op))]>, Sched<[]> { + bits<3> PNd; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz; + let Inst{21-3} = 0b1000000111100000010; + let Inst{2-0} = PNd; +} + + +multiclass sve2p1_ptrue_pn { + def _B : sve2p1_ptrue_pn; + def _H : sve2p1_ptrue_pn; + def _S : sve2p1_ptrue_pn; + def _D : sve2p1_ptrue_pn; +} + + +class sve2p1_int_ctr_to_mask sz, PPRRegOp pprty> + : I<(outs pprty:$Pd), (ins PNRAny_p8_p15:$PNn, VectorIndexS32b:$imm2), + mnemonic, "\t$Pd, $PNn$imm2", + "", []>, Sched<[]> { + bits<4> Pd; + bits<3> PNn; + bits<2> imm2; + let Inst{31-24} = 0b00100101; + let Inst{23-22} = sz; + let Inst{21-10} = 0b100000011100; + let Inst{9-8} = imm2; + let Inst{7-5} = PNn; + let Inst{4} = 0b1; + let Inst{3-0} = Pd; +} + + +multiclass sve2p1_int_ctr_to_mask { + def _B : sve2p1_int_ctr_to_mask; + def _H : sve2p1_int_ctr_to_mask; + def _S : sve2p1_int_ctr_to_mask; + def _D : sve2p1_int_ctr_to_mask; + + def : SVE_2_Op_Imm_Pat(NAME # _B)>; + def : SVE_2_Op_Imm_Pat(NAME # _H)>; + def : SVE_2_Op_Imm_Pat(NAME # _S)>; + def : SVE_2_Op_Imm_Pat(NAME # _D)>; +} + + +// SME2 multi-vec contiguous load (scalar plus scalar, two registers) +class sve2p1_mem_cld_ss_2z msz, bit n, + RegisterOperand vector_ty, RegisterOperand gpr_ty> + : I<(outs vector_ty:$Zt), + (ins PNRAny_p8_p15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), + mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", + "", []>, Sched<[]> { + bits<4> Zt; + bits<5> Rm; + bits<5> Rn; + bits<3> PNg; + let Inst{31-21} = 0b10100000000; + let Inst{20-16} = Rm; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-1} = Zt; + let Inst{0} = n; + + let mayLoad = 1; +} + +// SME2 multi-vec contiguous load (scalar plus immediate, two registers) +class sve2p1_mem_cld_si_2z msz, bit n, + RegisterOperand vector_ty> + : I<(outs vector_ty:$Zt), + (ins PNRAny_p8_p15:$PNg, GPR64sp:$Rn, simm4s2:$imm4), + mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<4> Zt; + bits<5> Rn; + bits<3> PNg; + bits<4> imm4; + let Inst{31-20} = 0b101000000100; + let Inst{19-16} = imm4; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-1} = Zt; + let Inst{0} = n; + + let mayLoad = 1; +} + +multiclass sve2p1_mem_cld_si_2z msz, bit n, + RegisterOperand vector_ty> { + def NAME : sve2p1_mem_cld_si_2z; + + def : InstAlias(NAME) vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, 0), 1>; +} + +// SME2 multi-vec contiguous load (scalar plus scalar, four registers) +class sve2p1_mem_cld_ss_4z msz, bit n, + RegisterOperand vector_ty, RegisterOperand gpr_ty> + : I<(outs vector_ty:$Zt), + (ins PNRAny_p8_p15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), + mnemonic, "\t$Zt, $PNg/z, [$Rn, $Rm]", + "", []>, Sched<[]> { + bits<3> Zt; + bits<5> Rm; + bits<5> Rn; + bits<3> PNg; + let Inst{31-21} = 0b10100000000; + let Inst{20-16} = Rm; + let Inst{15} = 0b1; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-2} = Zt; + let Inst{1} = 0b0; + let Inst{0} = n; + + let mayLoad = 1; +} + +// SME2 multi-vec contiguous load (scalar plus immediate, four registers) +class sve2p1_mem_cld_si_4z msz, bit n, + RegisterOperand vector_ty> + : I<(outs vector_ty:$Zt), + (ins PNRAny_p8_p15:$PNg, GPR64sp:$Rn, simm4s4:$imm4), + mnemonic, "\t$Zt, $PNg/z, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<3> Zt; + bits<5> Rn; + bits<3> PNg; + bits<4> imm4; + let Inst{31-20} = 0b101000000100; + let Inst{19-16} = imm4; + let Inst{15} = 0b1; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-2} = Zt; + let Inst{1} = 0b0; + let Inst{0} = n; + + let mayLoad = 1; +} + +multiclass sve2p1_mem_cld_si_4z msz, bit n, + RegisterOperand vector_ty> { + def NAME : sve2p1_mem_cld_si_4z; + + def : InstAlias(NAME) vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, 0), 1>; +} + + +// SME2 multi-vec contiguous store (scalar plus scalar, two registers) +class sve2p1_mem_cst_ss_2z msz, bit n, + RegisterOperand vector_ty, RegisterOperand gpr_ty> + : I<(outs ), + (ins vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), + mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", + "", []>, Sched<[]> { + bits<4> Zt; + bits<5> Rm; + bits<5> Rn; + bits<3> PNg; + let Inst{31-21} = 0b10100000001; + let Inst{20-16} = Rm; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-1} = Zt; + let Inst{0} = n; + + let mayStore = 1; +} + + +// SME2 multi-vec contiguous store (scalar plus immediate, two registers) +class sve2p1_mem_cst_si_2z msz, bit n, + RegisterOperand vector_ty> + : I<(outs ), + (ins vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, simm4s2:$imm4), + mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<4> Zt; + bits<5> Rn; + bits<3> PNg; + bits<4> imm4; + let Inst{31-20} = 0b101000000110; + let Inst{19-16} = imm4; + let Inst{15} = 0b0; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-1} = Zt; + let Inst{0} = n; + + let mayStore = 1; +} + + +multiclass sve2p1_mem_cst_si_2z msz, bit n, + RegisterOperand vector_ty> { + def NAME : sve2p1_mem_cst_si_2z; + + def : InstAlias(NAME) vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, 0), 1>; +} + + +// SME2 multi-vec contiguous store (scalar plus scalar, four registers) +class sve2p1_mem_cst_ss_4z msz, bit n, + RegisterOperand vector_ty, RegisterOperand gpr_ty> + : I<(outs ), + (ins vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, gpr_ty:$Rm), + mnemonic, "\t$Zt, $PNg, [$Rn, $Rm]", + "", []>, Sched<[]> { + bits<3> Zt; + bits<5> Rm; + bits<5> Rn; + bits<3> PNg; + let Inst{31-21} = 0b10100000001; + let Inst{20-16} = Rm; + let Inst{15} = 0b1; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-2} = Zt; + let Inst{1} = 0b0; + let Inst{0} = n; + + let mayStore = 1; +} + + +// SME2 multi-vec contiguous store (scalar plus immediate, four registers) +class sve2p1_mem_cst_si_4z msz, bit n, + RegisterOperand vector_ty> + : I<(outs ), + (ins vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn, simm4s4:$imm4), + mnemonic, "\t$Zt, $PNg, [$Rn, $imm4, mul vl]", + "", []>, Sched<[]> { + bits<3> Zt; + bits<5> Rn; + bits<3> PNg; + bits<4> imm4; + let Inst{31-20} = 0b101000000110; + let Inst{19-16} = imm4; + let Inst{15} = 0b1; + let Inst{14-13} = msz; + let Inst{12-10} = PNg; + let Inst{9-5} = Rn; + let Inst{4-2} = Zt; + let Inst{1} = 0b0; + let Inst{0} = n; + + let mayStore = 1; +} + + +multiclass sve2p1_mem_cst_si_4z msz, bit n, + RegisterOperand vector_ty> { + def NAME : sve2p1_mem_cst_si_4z; + + def : InstAlias(NAME) vector_ty:$Zt, PNRAny_p8_p15:$PNg, GPR64sp:$Rn,0), 1>; +} diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1629,6 +1629,10 @@ return &SI; } + // The code below doesn't work on aarch64_svcount. + if (TrueVal->getType()->isAArch64SvcountTy()) + return nullptr; + // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring // decomposeBitTestICmp() might help. { diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -4620,7 +4620,7 @@ // Skip alloca forms that this analysis can't handle. auto *AT = AI.getAllocatedType(); if (AI.isArrayAllocation() || !AT->isSized() || isa(AT) || - DL.getTypeAllocSize(AT).getFixedSize() == 0) + AT->isAArch64SvcountTy() || DL.getTypeAllocSize(AT).getFixedSize() == 0) return false; bool Changed = false; diff --git a/llvm/test/Assembler/aarch64_svcount.ll b/llvm/test/Assembler/aarch64_svcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Assembler/aarch64_svcount.ll @@ -0,0 +1,17 @@ +; RUN: llvm-as < %s | llvm-dis | FileCheck %s +; RUN: verify-uselistorder %s +; Basic smoke test for aarch64_svcount type. + +define aarch64_svcount @func1(aarch64_svcount %A) { +; CHECK-LABEL: @func1 +; CHECK: ret aarch64_svcount %A + ret aarch64_svcount %A +} + +define aarch64_svcount @func2(aarch64_svcount %A) { +; CHECK-LABEL: @func2 +; CHECK: %1 = call aarch64_svcount @func1(aarch64_svcount %A) +; CHECK-NEXT: ret aarch64_svcount %1 + %1 = call aarch64_svcount @func1(aarch64_svcount %A); + ret aarch64_svcount %1 +} diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll @@ -4,9 +4,9 @@ define void @asm_simple_memory_clobber() { ; CHECK-LABEL: name: asm_simple_memory_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0 - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, !0 - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, !0 + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, !0 + ; CHECK-NEXT: RET_ReallyLR call void asm sideeffect "", "~{memory}"(), !srcloc !0 call void asm sideeffect "", ""(), !srcloc !0 ret void @@ -17,8 +17,8 @@ define void @asm_simple_register_clobber() { ; CHECK-LABEL: name: asm_simple_register_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK: INLINEASM &"mov x0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $x0, !0 - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: INLINEASM &"mov x0, 7", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $x0, !0 + ; CHECK-NEXT: RET_ReallyLR call void asm sideeffect "mov x0, 7", "~{x0}"(), !srcloc !0 ret void } @@ -26,7 +26,7 @@ define i64 @asm_register_early_clobber() { ; CHECK-LABEL: name: asm_register_early_clobber ; CHECK: bb.1 (%ir-block.0): - ; CHECK: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 1703947 /* regdef-ec:GPR64common */, def early-clobber %0, 1703947 /* regdef-ec:GPR64common */, def early-clobber %1, !0 + ; CHECK: INLINEASM &"mov $0, 7; mov $1, 7", 1 /* sideeffect attdialect */, 1769483 /* regdef-ec:GPR64common */, def early-clobber %0, 1769483 /* regdef-ec:GPR64common */, def early-clobber %1, !0 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK: [[ADD:%[0-9]+]]:_(s64) = G_ADD [[COPY]], [[COPY1]] @@ -42,10 +42,10 @@ define i32 @test_specific_register_output() nounwind ssp { ; CHECK-LABEL: name: test_specific_register_output ; CHECK: bb.1.entry: - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $w0 - ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $w0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: %0 = tail call i32 asm "mov ${0:w}, 7", "={w0}"() nounwind ret i32 %0 @@ -54,7 +54,7 @@ define i32 @test_single_register_output() nounwind ssp { ; CHECK-LABEL: name: test_single_register_output ; CHECK: bb.1.entry: - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0 + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 720906 /* regdef:GPR32common */, def %0 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK: $w0 = COPY [[COPY]](s32) ; CHECK: RET_ReallyLR implicit $w0 @@ -66,7 +66,7 @@ define i64 @test_single_register_output_s64() nounwind ssp { ; CHECK-LABEL: name: test_single_register_output_s64 ; CHECK: bb.1.entry: - ; CHECK: INLINEASM &"mov $0, 7", 0 /* attdialect */, 1703946 /* regdef:GPR64common */, def %0 + ; CHECK: INLINEASM &"mov $0, 7", 0 /* attdialect */, 1769482 /* regdef:GPR64common */, def %0 ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY %0 ; CHECK: $x0 = COPY [[COPY]](s64) ; CHECK: RET_ReallyLR implicit $x0 @@ -79,7 +79,7 @@ define float @test_multiple_register_outputs_same() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_same ; CHECK: bb.1 (%ir-block.0): - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0, 655370 /* regdef:GPR32common */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 720906 /* regdef:GPR32common */, def %0, 720906 /* regdef:GPR32common */, def %1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY]], [[COPY1]] @@ -96,7 +96,7 @@ define double @test_multiple_register_outputs_mixed() #0 { ; CHECK-LABEL: name: test_multiple_register_outputs_mixed ; CHECK: bb.1 (%ir-block.0): - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %0, 1507338 /* regdef:FPR64 */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 720906 /* regdef:GPR32common */, def %0, 1572874 /* regdef:FPR64 */, def %1 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY %0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY %1 ; CHECK: $d0 = COPY [[COPY1]](s64) @@ -125,7 +125,7 @@ ; CHECK: liveins: $x0 ; ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %1 + ; CHECK: INLINEASM &"mov ${0:w}, 32", 0 /* attdialect */, 720906 /* regdef:GPR32common */, def %1 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -139,12 +139,12 @@ define float @test_vector_output() nounwind { ; CHECK-LABEL: name: test_vector_output ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: INLINEASM &"fmov ${0}.2s, #1.0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $d14 - ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d14 - ; CHECK: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) - ; CHECK: $s0 = COPY [[EVEC]](s32) - ; CHECK: RET_ReallyLR implicit $s0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: INLINEASM &"fmov ${0}.2s, #1.0", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $d14 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d14 + ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<2 x s32>), [[C]](s64) + ; CHECK-NEXT: $s0 = COPY [[EVEC]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $s0 %1 = tail call <2 x float> asm sideeffect "fmov ${0}.2s, #1.0", "={v14}"() nounwind %2 = extractelement <2 x float> %1, i32 0 ret float %2 @@ -155,7 +155,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 42 ; CHECK: [[COPY:%[0-9]+]]:gpr64common = COPY [[C]](s64) - ; CHECK: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 1703945 /* reguse:GPR64common */, [[COPY]] + ; CHECK: INLINEASM &"mov x0, $0", 1 /* sideeffect attdialect */, 1769481 /* reguse:GPR64common */, %0 ; CHECK: RET_ReallyLR call void asm sideeffect "mov x0, $0", "r"(i64 42) ret void @@ -165,10 +165,10 @@ define i32 @test_boolean_imm_ext() { ; CHECK-LABEL: name: test_boolean_imm_ext ; CHECK: bb.1.entry: - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: INLINEASM &"#TEST 42 + ${0:c} - .\0A\09", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 1 - ; CHECK: $w0 = COPY [[C]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: INLINEASM &"#TEST 42 + ${0:c} - .\0A\09", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 1 + ; CHECK-NEXT: $w0 = COPY [[C]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 entry: tail call void asm sideeffect "#TEST 42 + ${0:c} - .\0A\09", "i"(i1 true) ret i32 1 @@ -177,8 +177,8 @@ define void @test_input_imm() { ; CHECK-LABEL: name: test_input_imm ; CHECK: bb.1 (%ir-block.0): - ; CHECK: INLINEASM &"mov x0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42 - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: INLINEASM &"mov x0, $0", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 42 + ; CHECK-NEXT: RET_ReallyLR call void asm sideeffect "mov x0, $0", "i"(i64 42) ret void } @@ -189,7 +189,7 @@ ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]](p0) - ; CHECK: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 655370 /* regdef:GPR32common */, def %1, 1703945 /* reguse:GPR64common */, [[COPY1]] + ; CHECK: INLINEASM &"ldtrb ${0:w}, [$1]", 0 /* attdialect */, 720906 /* regdef:GPR32common */, def %1, 1769481 /* reguse:GPR64common */, %2 ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY %1 ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32) ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s8) @@ -205,7 +205,7 @@ ; CHECK: bb.1 (%ir-block.0): ; CHECK: liveins: $x0 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 - ; CHECK: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 655370 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, [[COPY]](p0) + ; CHECK: INLINEASM &"ldr $0, $1", 8 /* mayload attdialect */, 720906 /* regdef:GPR32common */, def %1, 262158 /* mem:m */, %0(p0) ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %1 ; CHECK: $w0 = COPY [[COPY1]](s32) ; CHECK: RET_ReallyLR implicit $w0 @@ -219,7 +219,7 @@ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 655370 /* regdef:GPR32common */, def %0, 655369 /* reguse:GPR32common */, [[COPY]] + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 720906 /* regdef:GPR32common */, def %0, 720905 /* reguse:GPR32common */, [[COPY]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) @@ -235,7 +235,7 @@ ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1 ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16) ; CHECK: [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32) - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 655370 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3) + ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 720906 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY %0 ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16) @@ -248,11 +248,11 @@ define i64 @test_input_with_matching_constraint_to_physical_register() { ; CHECK-LABEL: name: test_input_with_matching_constraint_to_physical_register ; CHECK: bb.1 (%ir-block.0): - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: INLINEASM &"", 0 /* attdialect */, 10 /* regdef */, implicit-def $x2, 2147483657 /* reguse tiedto:$0 */, [[C]](tied-def 3)(s64) - ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK: $x0 = COPY [[COPY]](s64) - ; CHECK: RET_ReallyLR implicit $x0 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: INLINEASM &"", 0 /* attdialect */, 10 /* regdef */, implicit-def $x2, 2147483657 /* reguse tiedto:$0 */, [[C]](tied-def 3)(s64) + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK-NEXT: $x0 = COPY [[COPY]](s64) + ; CHECK-NEXT: RET_ReallyLR implicit $x0 %1 = tail call i64 asm "", "={x2},0"(i64 0) ret i64 %1 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-unwind-inline-asm.ll @@ -69,7 +69,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64common = COPY [[DEF]](p0) - ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1703945 /* reguse:GPR64common */, [[COPY]] + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, 1769481 /* reguse:GPR64common */, %0 ; CHECK-NEXT: G_BR %bb.2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.a: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/regbank-inlineasm.mir @@ -10,8 +10,8 @@ bb.1: ; CHECK-LABEL: name: inlineasm_memory_clobber ; CHECK: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */ - ; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */ - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: RET_ReallyLR INLINEASM &"", 25 INLINEASM &"", 1 RET_ReallyLR @@ -26,7 +26,7 @@ bb.1: ; CHECK-LABEL: name: inlineasm_register_clobber ; CHECK: INLINEASM &"", 25 /* sideeffect mayload maystore attdialect */, 12 /* clobber */, implicit-def early-clobber $d0 - ; CHECK: RET_ReallyLR + ; CHECK-NEXT: RET_ReallyLR INLINEASM &"", 25, 12, implicit-def early-clobber $d0 RET_ReallyLR ... @@ -40,9 +40,9 @@ bb.1: ; CHECK-LABEL: name: inlineasm_phys_reg_output ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $w0 - ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 - ; CHECK: $w0 = COPY [[COPY]](s32) - ; CHECK: RET_ReallyLR implicit $w0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(s32) = COPY $w0 + ; CHECK-NEXT: $w0 = COPY [[COPY]](s32) + ; CHECK-NEXT: RET_ReallyLR implicit $w0 INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 10 /* regdef */, implicit-def $w0 %0:_(s32) = COPY $w0 $w0 = COPY %0(s32) @@ -57,7 +57,7 @@ body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_reg_output - ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %0 + ; CHECK: INLINEASM &"mov ${0:w}, 7", 0 /* attdialect */, 1310730 /* regdef:WSeqPairsClass_with_sube32_in_GPR32arg */, def %0 ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK: $w0 = COPY [[COPY]](s32) ; CHECK: RET_ReallyLR implicit $w0 @@ -75,7 +75,7 @@ body: | bb.1: ; CHECK-LABEL: name: inlineasm_virt_mixed_types - ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:WSeqPairsClass_with_sube32_in_MatrixIndexGPR32_12_15 */, def %0, 2162698 /* regdef:FIXED_REGS */, def %1 + ; CHECK: INLINEASM &"mov $0, #0; mov $1, #0", 0 /* attdialect */, 1310730 /* regdef:WSeqPairsClass_with_sube32_in_GPR32arg */, def %0, 2162698 /* regdef:GPR64arg */, def %1 ; CHECK: [[COPY:%[0-9]+]]:gpr(s32) = COPY %0 ; CHECK: [[COPY1:%[0-9]+]]:fpr(s64) = COPY %1 ; CHECK: $d0 = COPY [[COPY1]](s64) diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -118,6 +118,7 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: AArch64 Local Dynamic TLS Access Clean-up ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: SME Peephole Optimization pass ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount-O3.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -O3 -mtriple=aarch64 -mattr=+sme -S < %s | FileCheck %s + +; Test PHI nodes are allowed with +define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val0, aarch64_svcount %val1, ptr %iptr, ptr %pptr, i64 %N) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: entry: +; CHECK-NEXT: store i64 0, ptr [[IPTR:%.*]], align 4 +; CHECK-NEXT: store aarch64_svcount [[VAL0:%.*]], ptr [[PPTR:%.*]], align 2 +; CHECK-NEXT: [[I1_PEEL:%.*]] = icmp eq i64 [[N:%.*]], 0 +; CHECK-NEXT: br i1 [[I1_PEEL]], label [[LOOP_EXIT:%.*]], label [[LOOP_BODY:%.*]] +; CHECK: loop.body: +; CHECK-NEXT: [[IND:%.*]] = phi i64 [ [[IND_NEXT:%.*]], [[LOOP_BODY]] ], [ 1, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[IPTR_GEP:%.*]] = getelementptr i64, ptr [[IPTR]], i64 [[IND]] +; CHECK-NEXT: store i64 [[IND]], ptr [[IPTR_GEP]], align 4 +; CHECK-NEXT: store aarch64_svcount [[VAL1:%.*]], ptr [[PPTR]], align 2 +; CHECK-NEXT: [[IND_NEXT]] = add i64 [[IND]], 1 +; CHECK-NEXT: [[I1:%.*]] = icmp eq i64 [[IND]], [[N]] +; CHECK-NEXT: br i1 [[I1]], label [[LOOP_EXIT]], label [[LOOP_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: loop.exit: +; CHECK-NEXT: [[PHI_LCSSA:%.*]] = phi aarch64_svcount [ [[VAL0]], [[ENTRY]] ], [ [[VAL1]], [[LOOP_BODY]] ] +; CHECK-NEXT: ret aarch64_svcount [[PHI_LCSSA]] +; +entry: + br label %loop.body + +loop.body: + %ind = phi i64 [0, %entry], [%ind.next, %loop.body] + %phi = phi aarch64_svcount [%val0, %entry], [%val1, %loop.body] + %iptr.gep = getelementptr i64, ptr %iptr, i64 %ind + store i64 %ind, ptr %iptr.gep + store aarch64_svcount %phi, ptr %pptr + %ind.next = add i64 %ind, 1 + %i1 = icmp eq i64 %ind, %N + br i1 %i1, label %loop.exit, label %loop.body + +loop.exit: + ret aarch64_svcount %phi +} diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -0,0 +1,146 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,CHECKO0 +; RUN: llc -O3 -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s --check-prefixes=CHECK,CHECKO3 + +; +; Test simple loads, stores and return. +; +define aarch64_svcount @test_load(ptr %ptr) nounwind { +; CHECK-LABEL: test_load: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr p0, [x0] +; CHECK-NEXT: ret + %res = load aarch64_svcount, ptr %ptr + ret aarch64_svcount %res +} + +define void @test_store(ptr %ptr, aarch64_svcount %val) nounwind { +; CHECK-LABEL: test_store: +; CHECK: // %bb.0: +; CHECK-NEXT: str p0, [x0] +; CHECK-NEXT: ret + store aarch64_svcount %val, ptr %ptr + ret void +} + +define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val) nounwind { +; CHECKO0-LABEL: test_alloca_store_reload: +; CHECKO0: // %bb.0: +; CHECKO0-NEXT: sub sp, sp, #16 +; CHECKO0-NEXT: add x8, sp, #14 +; CHECKO0-NEXT: str p0, [x8] +; CHECKO0-NEXT: ldr p0, [x8] +; CHECKO0-NEXT: add sp, sp, #16 +; CHECKO0-NEXT: ret +; +; CHECKO3-LABEL: test_alloca_store_reload: +; CHECKO3: // %bb.0: +; CHECKO3-NEXT: sub sp, sp, #16 +; CHECKO3-NEXT: add x8, sp, #14 +; CHECKO3-NEXT: str p0, [x8] +; CHECKO3-NEXT: add sp, sp, #16 +; CHECKO3-NEXT: ret + %ptr = alloca aarch64_svcount, align 1 + store aarch64_svcount %val, ptr %ptr + %res = load aarch64_svcount, ptr %ptr + ret aarch64_svcount %res +} + +; +; Test passing as arguments (from perspective of callee) +; + +define aarch64_svcount @test_return_arg1(aarch64_svcount %arg0, aarch64_svcount %arg1) nounwind { +; CHECK-LABEL: test_return_arg1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov p0.b, p1.b +; CHECK-NEXT: ret + ret aarch64_svcount %arg1 +} + +define aarch64_svcount @test_return_arg4(aarch64_svcount %arg0, aarch64_svcount %arg1, aarch64_svcount %arg2, aarch64_svcount %arg3, aarch64_svcount %arg4) nounwind { +; CHECK-LABEL: test_return_arg4: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr p0, [x0] +; CHECK-NEXT: ret + ret aarch64_svcount %arg4 +} + +; +; Test passing as arguments (from perspective of caller) +; + +declare void @take_svcount_1(aarch64_svcount %arg) +define void @test_pass_1arg(aarch64_svcount %arg) nounwind { +; CHECK-LABEL: test_pass_1arg: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl take_svcount_1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @take_svcount_1(aarch64_svcount %arg) + ret void +} + +declare void @take_svcount_5(aarch64_svcount %arg0, aarch64_svcount %arg1, aarch64_svcount %arg2, aarch64_svcount %arg3, aarch64_svcount %arg4) +define void @test_pass_5args(aarch64_svcount %arg) nounwind { +; CHECKO0-LABEL: test_pass_5args: +; CHECKO0: // %bb.0: +; CHECKO0-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECKO0-NEXT: addvl sp, sp, #-1 +; CHECKO0-NEXT: mov p3.b, p0.b +; CHECKO0-NEXT: str p3, [sp, #7, mul vl] +; CHECKO0-NEXT: addpl x0, sp, #7 +; CHECKO0-NEXT: mov p0.b, p3.b +; CHECKO0-NEXT: mov p1.b, p3.b +; CHECKO0-NEXT: mov p2.b, p3.b +; CHECKO0-NEXT: bl take_svcount_5 +; CHECKO0-NEXT: addvl sp, sp, #1 +; CHECKO0-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECKO0-NEXT: ret +; +; CHECKO3-LABEL: test_pass_5args: +; CHECKO3: // %bb.0: +; CHECKO3-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECKO3-NEXT: addvl sp, sp, #-1 +; CHECKO3-NEXT: addpl x0, sp, #7 +; CHECKO3-NEXT: mov p1.b, p0.b +; CHECKO3-NEXT: mov p2.b, p0.b +; CHECKO3-NEXT: mov p3.b, p0.b +; CHECKO3-NEXT: str p0, [sp, #7, mul vl] +; CHECKO3-NEXT: bl take_svcount_5 +; CHECKO3-NEXT: addvl sp, sp, #1 +; CHECKO3-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECKO3-NEXT: ret + call void @take_svcount_5(aarch64_svcount %arg, aarch64_svcount %arg, aarch64_svcount %arg, aarch64_svcount %arg, aarch64_svcount %arg) + ret void +} + +; Test code-generation of aarch64svcount being used in a select to support e.g. +; return k < 42 ? y : z; +; where y and z are of types svcount_t. +define aarch64_svcount @foo(i32 %k, aarch64_svcount %x, aarch64_svcount %y) { +; CHECKO0-LABEL: foo: +; CHECKO0: // %bb.0: +; CHECKO0-NEXT: mov p2.b, p1.b +; CHECKO0-NEXT: mov p1.b, p0.b +; CHECKO0-NEXT: subs w8, w0, #42 +; CHECKO0-NEXT: cset w8, lt +; CHECKO0-NEXT: sbfx w9, w8, #0, #1 +; CHECKO0-NEXT: mov w8, wzr +; CHECKO0-NEXT: whilelo p0.b, w8, w9 +; CHECKO0-NEXT: sel p0.b, p0, p1.b, p2.b +; CHECKO0-NEXT: ret +; +; CHECKO3-LABEL: foo: +; CHECKO3: // %bb.0: +; CHECKO3-NEXT: cmp w0, #42 +; CHECKO3-NEXT: cset w8, lt +; CHECKO3-NEXT: sbfx w8, w8, #0, #1 +; CHECKO3-NEXT: whilelo p2.b, wzr, w8 +; CHECKO3-NEXT: sel p0.b, p2, p0.b, p1.b +; CHECKO3-NEXT: ret + %cmp = icmp slt i32 %k, 42 + %x.y = select i1 %cmp, aarch64_svcount %x, aarch64_svcount %y + ret aarch64_svcount %x.y +} diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -0,0 +1,535 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -enable-aarch64-sme-peephole-opt=false -mtriple=aarch64-linux-gnu -mattr=+sme < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL +; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -enable-aarch64-sme-peephole-opt=false -mtriple=aarch64-linux-gnu -mattr=+sme < %s \ +; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL + + +declare double @streaming_callee(double ) "aarch64_pstate_sm_enabled" +declare double @nonstreaming_callee(double ) + +define double @nonstreaming_caller(double %x) nounwind noinline optnone { +; CHECK-FISEL-LABEL: nonstreaming_caller: +; CHECK-FISEL: // %bb.0: // %entry +; CHECK-FISEL-NEXT: sub sp, sp, #96 +; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: smstart sm +; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: bl streaming_callee +; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: smstop sm +; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0 +; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0] +; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: fadd d0, d1, d0 +; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: add sp, sp, #96 +; CHECK-FISEL-NEXT: ret +; +; CHECK-GISEL-LABEL: nonstreaming_caller: +; CHECK-GISEL: // %bb.0: // %entry +; CHECK-GISEL-NEXT: sub sp, sp, #96 +; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: smstart sm +; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: bl streaming_callee +; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: smstop sm +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: fmov d0, x8 +; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: fadd d0, d1, d0 +; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: add sp, sp, #96 +; CHECK-GISEL-NEXT: ret +entry: + %call = call double @streaming_callee(double %x) "aarch64_pstate_sm_enabled" + %add = fadd double %call, 4.200000e+01 + ret double %add +} + + +define double @streaming_caller(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" { +; CHECK-FISEL-LABEL: streaming_caller: +; CHECK-FISEL: // %bb.0: // %entry +; CHECK-FISEL-NEXT: sub sp, sp, #96 +; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: smstop sm +; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: bl nonstreaming_callee +; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: smstart sm +; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0 +; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] +; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: fadd d0, d1, d0 +; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-FISEL-NEXT: add sp, sp, #96 +; CHECK-FISEL-NEXT: ret +; +; CHECK-GISEL-LABEL: streaming_caller: +; CHECK-GISEL: // %bb.0: // %entry +; CHECK-GISEL-NEXT: sub sp, sp, #96 +; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: smstop sm +; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: bl nonstreaming_callee +; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-GISEL-NEXT: smstart sm +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: fmov d0, x8 +; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: fadd d0, d1, d0 +; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-GISEL-NEXT: add sp, sp, #96 +; CHECK-GISEL-NEXT: ret +entry: + %call = call double @nonstreaming_callee(double %x) + %add = fadd double %call, 4.200000e+01 + ret double %add +} + +;; Check ZA state + +define double @za_new_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_new"{ +; CHECK-FISEL-LABEL: za_new_caller_to_za_none_callee: +; CHECK-FISEL: // %bb.0: // %prelude +; CHECK-FISEL-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-FISEL-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: mov x29, sp +; CHECK-FISEL-NEXT: sub sp, sp, #16 +; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-FISEL-NEXT: cbz x8, .LBB2_2 +; CHECK-FISEL-NEXT: // %bb.1: // %save.za +; CHECK-FISEL-NEXT: bl __arm_tpidr2_save +; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr +; CHECK-FISEL-NEXT: .LBB2_2: // %entry +; CHECK-FISEL-NEXT: smstart za +; CHECK-FISEL-NEXT: mov x19, sp +; CHECK-FISEL-NEXT: rdsvl x8, #1 +; CHECK-FISEL-NEXT: mul x8, x8, x8 +; CHECK-FISEL-NEXT: mov x9, sp +; CHECK-FISEL-NEXT: subs x9, x9, x8 +; CHECK-FISEL-NEXT: mov sp, x9 +; CHECK-FISEL-NEXT: stur x9, [x29, #-16] +; CHECK-FISEL-NEXT: sturh w8, [x29, #-8] +; CHECK-FISEL-NEXT: sub x8, x29, #16 +; CHECK-FISEL-NEXT: msr TPIDR2_EL0, x8 +; CHECK-FISEL-NEXT: bl nonstreaming_callee +; CHECK-FISEL-NEXT: smstart za +; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-FISEL-NEXT: sub x0, x29, #16 +; CHECK-FISEL-NEXT: cbz x8, .LBB2_3 +; CHECK-FISEL-NEXT: b .LBB2_4 +; CHECK-FISEL-NEXT: .LBB2_3: // %entry +; CHECK-FISEL-NEXT: bl __arm_tpidr2_restore +; CHECK-FISEL-NEXT: b .LBB2_4 +; CHECK-FISEL-NEXT: .LBB2_4: // %entry +; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr +; CHECK-FISEL-NEXT: mov sp, x19 +; CHECK-FISEL-NEXT: adrp x8, .LCPI2_0 +; CHECK-FISEL-NEXT: ldr d1, [x8, :lo12:.LCPI2_0] +; CHECK-FISEL-NEXT: fadd d0, d0, d1 +; CHECK-FISEL-NEXT: smstop za +; CHECK-FISEL-NEXT: mov sp, x29 +; CHECK-FISEL-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ret +; +; CHECK-GISEL-LABEL: za_new_caller_to_za_none_callee: +; CHECK-GISEL: // %bb.0: // %prelude +; CHECK-GISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GISEL-NEXT: mov x29, sp +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: rdsvl x8, #1 +; CHECK-GISEL-NEXT: mov x9, sp +; CHECK-GISEL-NEXT: msub x8, x8, x8, x9 +; CHECK-GISEL-NEXT: mov sp, x8 +; CHECK-GISEL-NEXT: stur x8, [x29, #-16] +; CHECK-GISEL-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-GISEL-NEXT: cbz x8, .LBB2_2 +; CHECK-GISEL-NEXT: b .LBB2_1 +; CHECK-GISEL-NEXT: .LBB2_1: // %save.za +; CHECK-GISEL-NEXT: bl __arm_tpidr2_save +; CHECK-GISEL-NEXT: msr TPIDR2_EL0, xzr +; CHECK-GISEL-NEXT: b .LBB2_2 +; CHECK-GISEL-NEXT: .LBB2_2: // %entry +; CHECK-GISEL-NEXT: smstart za +; CHECK-GISEL-NEXT: rdsvl x8, #1 +; CHECK-GISEL-NEXT: mul w8, w8, w8 +; CHECK-GISEL-NEXT: sturh w8, [x29, #-8] +; CHECK-GISEL-NEXT: sub x8, x29, #16 +; CHECK-GISEL-NEXT: msr TPIDR2_EL0, x8 +; CHECK-GISEL-NEXT: bl nonstreaming_callee +; CHECK-GISEL-NEXT: smstart za +; CHECK-GISEL-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-GISEL-NEXT: sub x0, x29, #16 +; CHECK-GISEL-NEXT: cbz x8, .LBB2_3 +; CHECK-GISEL-NEXT: b .LBB2_4 +; CHECK-GISEL-NEXT: .LBB2_3: // %entry +; CHECK-GISEL-NEXT: bl __arm_tpidr2_restore +; CHECK-GISEL-NEXT: b .LBB2_4 +; CHECK-GISEL-NEXT: .LBB2_4: // %entry +; CHECK-GISEL-NEXT: msr TPIDR2_EL0, xzr +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: fmov d1, x8 +; CHECK-GISEL-NEXT: fadd d0, d0, d1 +; CHECK-GISEL-NEXT: smstop za +; CHECK-GISEL-NEXT: mov sp, x29 +; CHECK-GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ret +entry: + %call = call double @nonstreaming_callee(double %x) + %add = fadd double %call, 4.200000e+01 + ret double %add; +} + +define double @za_shared_caller_to_za_none(double %x) nounwind noinline optnone "aarch64_pstate_za_shared"{ +; CHECK-FISEL-LABEL: za_shared_caller_to_za_none: +; CHECK-FISEL: // %bb.0: // %entry +; CHECK-FISEL-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-FISEL-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-FISEL-NEXT: mov x29, sp +; CHECK-FISEL-NEXT: sub sp, sp, #16 +; CHECK-FISEL-NEXT: mov x19, sp +; CHECK-FISEL-NEXT: rdsvl x8, #1 +; CHECK-FISEL-NEXT: mul x8, x8, x8 +; CHECK-FISEL-NEXT: mov x9, sp +; CHECK-FISEL-NEXT: subs x9, x9, x8 +; CHECK-FISEL-NEXT: mov sp, x9 +; CHECK-FISEL-NEXT: stur x9, [x29, #-16] +; CHECK-FISEL-NEXT: sturh w8, [x29, #-8] +; CHECK-FISEL-NEXT: sub x8, x29, #16 +; CHECK-FISEL-NEXT: msr TPIDR2_EL0, x8 +; CHECK-FISEL-NEXT: bl nonstreaming_callee +; CHECK-FISEL-NEXT: smstart za +; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-FISEL-NEXT: sub x0, x29, #16 +; CHECK-FISEL-NEXT: cbz x8, .LBB3_1 +; CHECK-FISEL-NEXT: b .LBB3_2 +; CHECK-FISEL-NEXT: .LBB3_1: // %entry +; CHECK-FISEL-NEXT: bl __arm_tpidr2_restore +; CHECK-FISEL-NEXT: b .LBB3_2 +; CHECK-FISEL-NEXT: .LBB3_2: // %entry +; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr +; CHECK-FISEL-NEXT: mov sp, x19 +; CHECK-FISEL-NEXT: adrp x8, .LCPI3_0 +; CHECK-FISEL-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] +; CHECK-FISEL-NEXT: fadd d0, d0, d1 +; CHECK-FISEL-NEXT: mov sp, x29 +; CHECK-FISEL-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-FISEL-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-FISEL-NEXT: ret +; +; CHECK-GISEL-LABEL: za_shared_caller_to_za_none: +; CHECK-GISEL: // %bb.0: // %entry +; CHECK-GISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-GISEL-NEXT: mov x29, sp +; CHECK-GISEL-NEXT: sub sp, sp, #16 +; CHECK-GISEL-NEXT: rdsvl x8, #1 +; CHECK-GISEL-NEXT: mul x8, x8, x8 +; CHECK-GISEL-NEXT: mov x9, sp +; CHECK-GISEL-NEXT: subs x9, x9, x8 +; CHECK-GISEL-NEXT: mov sp, x9 +; CHECK-GISEL-NEXT: stur x9, [x29, #-16] +; CHECK-GISEL-NEXT: sturh w8, [x29, #-8] +; CHECK-GISEL-NEXT: sub x8, x29, #16 +; CHECK-GISEL-NEXT: msr TPIDR2_EL0, x8 +; CHECK-GISEL-NEXT: bl nonstreaming_callee +; CHECK-GISEL-NEXT: smstart za +; CHECK-GISEL-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-GISEL-NEXT: sub x0, x29, #16 +; CHECK-GISEL-NEXT: cbz x8, .LBB3_1 +; CHECK-GISEL-NEXT: b .LBB3_2 +; CHECK-GISEL-NEXT: .LBB3_1: // %entry +; CHECK-GISEL-NEXT: bl __arm_tpidr2_restore +; CHECK-GISEL-NEXT: b .LBB3_2 +; CHECK-GISEL-NEXT: .LBB3_2: // %entry +; CHECK-GISEL-NEXT: msr TPIDR2_EL0, xzr +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: fmov d1, x8 +; CHECK-GISEL-NEXT: fadd d0, d0, d1 +; CHECK-GISEL-NEXT: mov sp, x29 +; CHECK-GISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-GISEL-NEXT: ret +entry: + %call = call double @nonstreaming_callee(double %x) + %add = fadd double %call, 4.200000e+01 + ret double %add; +} + + +define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" { +; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl nonstreaming_callee +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 +; CHECK-COMMON-NEXT: fmov d0, x8 +; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: fadd d0, d1, d0 +; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret + %call = call double @nonstreaming_callee(double %x); + %add = fadd double %call, 4.200000e+01 + ret double %add; +} + +define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noinline optnone { +; CHECK-COMMON-LABEL: normal_caller_to_locally_streaming_callee: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-COMMON-NEXT: bl locally_streaming_caller_normal_callee +; CHECK-COMMON-NEXT: adrp x8, .LCPI5_0 +; CHECK-COMMON-NEXT: ldr d1, [x8, :lo12:.LCPI5_0] +; CHECK-COMMON-NEXT: fadd d0, d0, d1 +; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ret + %call = call double @locally_streaming_caller_normal_callee(double %x) "aarch64_pstate_sm_body"; + %add = fadd double %call, 4.200000e+01 + ret double %add; +} + + +;; Check locally streaming Caller with Invoke + +define i32 @locally_streaming_caller_normal_invoke(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" personality i32 1 { +; CHECK-COMMON-LABEL: locally_streaming_caller_normal_invoke: +; CHECK-COMMON: .Lfunc_begin0: +; CHECK-COMMON-NEXT: .cfi_startproc +; CHECK-COMMON-NEXT: // %bb.0: // %entry +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: .cfi_def_cfa_offset 96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: .cfi_offset w19, -8 +; CHECK-COMMON-NEXT: .cfi_offset w30, -16 +; CHECK-COMMON-NEXT: .cfi_offset b8, -24 +; CHECK-COMMON-NEXT: .cfi_offset b9, -32 +; CHECK-COMMON-NEXT: .cfi_offset b10, -40 +; CHECK-COMMON-NEXT: .cfi_offset b11, -48 +; CHECK-COMMON-NEXT: .cfi_offset b12, -56 +; CHECK-COMMON-NEXT: .cfi_offset b13, -64 +; CHECK-COMMON-NEXT: .cfi_offset b14, -72 +; CHECK-COMMON-NEXT: .cfi_offset b15, -80 +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: mov w19, #15 +; CHECK-COMMON-NEXT: .Ltmp0: +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl nonstreaming_callee +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: .Ltmp1: +; CHECK-COMMON-NEXT: b .LBB6_2 +; CHECK-COMMON-NEXT: .LBB6_1: // %lpad +; CHECK-COMMON-NEXT: .Ltmp2: +; CHECK-COMMON-NEXT: mov x1, x0 +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: mov x0, x1 +; CHECK-COMMON-NEXT: bl __cxa_begin_catch +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: bl __cxa_end_catch +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: mov w19, #23 +; CHECK-COMMON-NEXT: b .LBB6_2 +; CHECK-COMMON-NEXT: .LBB6_2: // %return +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: mov w0, w19 +; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret +entry: + invoke void @nonstreaming_callee(double %x) to label %return unwind label %lpad +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %return +return: + %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] + ret i32 %retval +} + +define i32 @locally_streaming_caller_streaming_invoke(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" personality i32 1 { +; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_invoke: +; CHECK-COMMON: .Lfunc_begin1: +; CHECK-COMMON-NEXT: .cfi_startproc +; CHECK-COMMON-NEXT: // %bb.0: // %entry +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: .cfi_def_cfa_offset 96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: .cfi_offset w19, -8 +; CHECK-COMMON-NEXT: .cfi_offset w30, -16 +; CHECK-COMMON-NEXT: .cfi_offset b8, -24 +; CHECK-COMMON-NEXT: .cfi_offset b9, -32 +; CHECK-COMMON-NEXT: .cfi_offset b10, -40 +; CHECK-COMMON-NEXT: .cfi_offset b11, -48 +; CHECK-COMMON-NEXT: .cfi_offset b12, -56 +; CHECK-COMMON-NEXT: .cfi_offset b13, -64 +; CHECK-COMMON-NEXT: .cfi_offset b14, -72 +; CHECK-COMMON-NEXT: .cfi_offset b15, -80 +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: mov w19, #15 +; CHECK-COMMON-NEXT: .Ltmp3: +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl streaming_callee +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: .Ltmp4: +; CHECK-COMMON-NEXT: b .LBB7_2 +; CHECK-COMMON-NEXT: .LBB7_1: // %lpad +; CHECK-COMMON-NEXT: .Ltmp5: +; CHECK-COMMON-NEXT: mov x1, x0 +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: mov x0, x1 +; CHECK-COMMON-NEXT: bl __cxa_begin_catch +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: bl __cxa_end_catch +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: mov w19, #23 +; CHECK-COMMON-NEXT: b .LBB7_2 +; CHECK-COMMON-NEXT: .LBB7_2: // %return +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: mov w0, w19 +; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret +entry: + invoke void @streaming_callee(double %x) to label %return unwind label %lpad +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %return +return: + %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] + ret i32 %retval +} + +declare ptr @__cxa_begin_catch(ptr) +declare void @__cxa_end_catch() + + +;; Check attribute in the call itself + +define void @locally_streaming_caller_streaminhg_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" { +; CHECK-COMMON-LABEL: locally_streaming_caller_streaminhg_callee_ptr: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: blr x0 +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret + call void %p() "aarch64_pstate_sm_enabled" + ret void +} + +define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone { +; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: blr x0 +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret + call void %p() "aarch64_pstate_sm_enabled" + ret void +} diff --git a/llvm/test/CodeGen/AArch64/sme-exceptions-with-streaming-mode-llc.ll b/llvm/test/CodeGen/AArch64/sme-exceptions-with-streaming-mode-llc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-exceptions-with-streaming-mode-llc.ll @@ -0,0 +1,102 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs -o - %S/sme-exceptions-with-streaming-mode.ll | FileCheck %s +; XFAIL: * +; CHECK-LABEL: no_za_streaming_enabled: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: .LBB0_2: // %return +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_3: // %lpad +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart sm +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: b .LBB0_2 +; +; CHECK-LABEL: no_za_streaming_compatible: +; CHECK: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: tbz x19, #0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB1_2: // %entry +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: tbz x19, #0, .LBB1_4 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_4: // %entry +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: .LBB1_6: // %return +; CHECK-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_7: // %lpad +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: tbz x19, #0, .LBB1_9 +; CHECK-NEXT: // %bb.8: // %lpad +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_9: // %lpad +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: b .LBB1_6 diff --git a/llvm/test/CodeGen/AArch64/sme-exceptions-with-streaming-mode.ll b/llvm/test/CodeGen/AArch64/sme-exceptions-with-streaming-mode.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-exceptions-with-streaming-mode.ll @@ -0,0 +1,89 @@ +; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s + +; XFAIL: * + +declare i32 @normal_callee() + +; Simple try-catch with no ZA state. No lazy-save is required, but we must restart pstate.sm in the +; exception handler. + +define i32 @no_za_streaming_enabled() "aarch64_pstate_sm_enabled" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@no_za_streaming_enabled() #0 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: invoke void @normal_callee() +; CHECK-NEXT: to label %return unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 1) +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] +; CHECK-NEXT: ret i32 %retval +; +entry: + invoke void @normal_callee() to label %return unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] + ret i32 %retval +} + +; As above, but the function is streaming_compatible + +define i32 @no_za_streaming_compatible() "aarch64_pstate_sm_compatible" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@no_za_streaming_compatible() #1 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i1 @llvm.aarch64.sme.get.pstatesm() +; CHECK-NEXT: %pstate.sm.zext = zext i1 %0 to i64 +; CHECK-NEXT: invoke void @normal_callee() +; CHECK-NEXT: to label %return unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %1 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 %pstate.sm.zext) +; CHECK-NEXT: %2 = extractvalue { ptr, i32 } %1, 0 +; CHECK-NEXT: %3 = tail call ptr @__cxa_begin_catch(ptr %2) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] +; CHECK-NEXT: ret i32 %retval +; +entry: + invoke void @normal_callee() to label %return unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] + ret i32 %retval +} + +declare ptr @__cxa_begin_catch(ptr) +declare void @__cxa_end_catch() + +;. +; CHECK: attributes #0 = { "aarch64_expanded_pstate_za" "aarch64_pstate_sm_enabled" } +; CHECK: attributes #1 = { "aarch64_expanded_pstate_za" "aarch64_pstate_sm_compatible" } +; CHECK: attributes #2 = { nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #3 = { inaccessiblememonly nocallback nofree nosync nounwind readonly willreturn } +;. diff --git a/llvm/test/CodeGen/AArch64/sme-exceptions-with-za-state-llc.ll b/llvm/test/CodeGen/AArch64/sme-exceptions-with-za-state-llc.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-exceptions-with-za-state-llc.ll @@ -0,0 +1,1304 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs -o - %S/sme-exceptions-with-za-state.ll | FileCheck %s + +; XFAIL: * + +; CHECK-LABEL: private_za_invoke: +; CHECK: .Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_offset b8, -24 +; CHECK-NEXT: .cfi_offset b9, -32 +; CHECK-NEXT: .cfi_offset b10, -40 +; CHECK-NEXT: .cfi_offset b11, -48 +; CHECK-NEXT: .cfi_offset b12, -56 +; CHECK-NEXT: .cfi_offset b13, -64 +; CHECK-NEXT: .cfi_offset b14, -72 +; CHECK-NEXT: .cfi_offset b15, -80 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-80] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-96] +; CHECK-NEXT: sturh w8, [x29, #-72] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #80 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: // %bb.1: // %check.za +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB0_3 +; CHECK-NEXT: // %bb.2: // %restore.za4 +; CHECK-NEXT: sub x0, x29, #80 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB0_3: // %return +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: .LBB0_4: // %return +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sub sp, x29, #64 +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_5: // %lpad +; CHECK-NEXT: .Ltmp2: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart sm +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #80 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: sub x8, x29, #96 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: sturh w9, [x29, #-88] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: smstop sm +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart sm +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB0_7 +; CHECK-NEXT: // %bb.6: // %restore.za +; CHECK-NEXT: sub x0, x29, #96 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB0_7: // %resume +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB0_4 +; +; CHECK-LABEL: new_za_invoke: +; CHECK: .Lfunc_begin1: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp d15, d14, [sp, #-96]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-80] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-96] +; CHECK-NEXT: sturh w8, [x29, #-72] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #80 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: .Ltmp3: +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: tbz x19, #0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB1_2: // %entry +; CHECK-NEXT: bl snap_new +; CHECK-NEXT: tbz x19, #0, .LBB1_4 +; CHECK-NEXT: // %bb.3: // %entry +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_4: // %entry +; CHECK-NEXT: .Ltmp4: +; CHECK-NEXT: // %bb.5: // %check.za +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB1_7 +; CHECK-NEXT: // %bb.6: // %restore.za4 +; CHECK-NEXT: sub x0, x29, #80 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB1_7: // %return +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: .LBB1_8: // %return +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sub sp, x29, #64 +; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB1_9: // %lpad +; CHECK-NEXT: .Ltmp5: +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: and x19, x8, #0x1 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: tbz x19, #0, .LBB1_11 +; CHECK-NEXT: // %bb.10: // %lpad +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_11: // %lpad +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #80 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: sub x8, x29, #96 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: sturh w9, [x29, #-88] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_sme_state +; CHECK-NEXT: and x19, x0, #0x1 +; CHECK-NEXT: tbz x19, #0, .LBB1_13 +; CHECK-NEXT: // %bb.12: // %lpad +; CHECK-NEXT: smstop sm +; CHECK-NEXT: .LBB1_13: // %lpad +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: tbz x19, #0, .LBB1_15 +; CHECK-NEXT: // %bb.14: // %lpad +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_15: // %lpad +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB1_17 +; CHECK-NEXT: // %bb.16: // %restore.za +; CHECK-NEXT: sub x0, x29, #96 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB1_17: // %resume +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB1_8 +; +; CHECK-LABEL: private_za_in_catch_block: +; CHECK: .Lfunc_begin2: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp6: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp7: +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: .LBB2_2: // %return +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_3: // %lpad1 +; CHECK-NEXT: .Ltmp8: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: sub x19, x29, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB2_5 +; CHECK-NEXT: // %bb.4: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB2_5: // %resume +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w8, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB2_7 +; CHECK-NEXT: // %bb.6: // %restore.za2 +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB2_7: // %resume1 +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB2_2 +; +; CHECK-LABEL: private_za_in_catch_block_from_new_za: +; CHECK: .Lfunc_begin3: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %prelude +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x9, xzr, #0xf +; CHECK-NEXT: madd x9, x8, x8, x9 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x10, x10, x9 +; CHECK-NEXT: mov sp, x10 +; CHECK-NEXT: stur x10, [x29, #-16] +; CHECK-NEXT: mrs x10, TPIDR2_EL0 +; CHECK-NEXT: cbz x10, .LBB3_2 +; CHECK-NEXT: // %bb.1: // %save.za +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .LBB3_2: // %entry +; CHECK-NEXT: smstart za +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: sub x19, x10, #16 +; CHECK-NEXT: mov sp, x19 +; CHECK-NEXT: mov x11, sp +; CHECK-NEXT: sub x9, x11, x9 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x10, #-16] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp9: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp10: +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: mov w0, #15 +; CHECK-NEXT: .LBB3_4: // %return +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB3_5: // %lpad1 +; CHECK-NEXT: .Ltmp11: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: strh w8, [x19, #8] +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB3_7 +; CHECK-NEXT: // %bb.6: // %restore.za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB3_7: // %resume +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: strh w8, [x19, #8] +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB3_9 +; CHECK-NEXT: // %bb.8: // %restore.za2 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB3_9: // %resume1 +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB3_4 +; +; CHECK-LABEL: call_after_catch_block: +; CHECK: .Lfunc_begin4: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp12: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp13: +; CHECK-NEXT: .LBB4_1: // %try.cont +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: b fizz_shared +; CHECK-NEXT: .LBB4_2: // %lpad1 +; CHECK-NEXT: .Ltmp14: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: cmp w1, #1 +; CHECK-NEXT: b.ne .LBB4_10 +; CHECK-NEXT: // %bb.3: // %catch +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp15: +; CHECK-NEXT: bl bar_shared +; CHECK-NEXT: .Ltmp16: +; CHECK-NEXT: // %bb.4: // %invoke.cont +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB4_6 +; CHECK-NEXT: // %bb.5: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB4_6: // %resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB4_1 +; CHECK-NEXT: .LBB4_7: // %lpad2 +; CHECK-NEXT: .Ltmp17: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp18: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp19: +; CHECK-NEXT: // %bb.8: // %check.za +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB4_10 +; CHECK-NEXT: // %bb.9: // %restore.za12 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB4_10: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB4_11: // %terminate.lpad +; CHECK-NEXT: .Ltmp20: +; CHECK-NEXT: bl __clang_call_terminate +; +; CHECK-LABEL: multiple_catch: +; CHECK: .Lfunc_begin5: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp21: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp22: +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w19, #42 +; CHECK-NEXT: .LBB5_2: // %return +; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_3: // %lpad1 +; CHECK-NEXT: .Ltmp23: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: cmp w1, #2 +; CHECK-NEXT: b.ne .LBB5_6 +; CHECK-NEXT: // %bb.4: // %catch1 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp29: +; CHECK-NEXT: bl bar_shared +; CHECK-NEXT: .Ltmp30: +; CHECK-NEXT: // %bb.5: +; CHECK-NEXT: mov w19, #42 +; CHECK-NEXT: b .LBB5_9 +; CHECK-NEXT: .LBB5_6: // %catch.fallthrough +; CHECK-NEXT: cmp w1, #1 +; CHECK-NEXT: b.ne .LBB5_16 +; CHECK-NEXT: // %bb.7: // %catch2 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp24: +; CHECK-NEXT: bl fizz_shared +; CHECK-NEXT: .Ltmp25: +; CHECK-NEXT: // %bb.8: +; CHECK-NEXT: mov w19, #23 +; CHECK-NEXT: .LBB5_9: // %return.sink.split +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB5_11 +; CHECK-NEXT: // %bb.10: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB5_11: // %resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB5_2 +; CHECK-NEXT: .LBB5_12: // %lpad2 +; CHECK-NEXT: .Ltmp26: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp27: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp28: +; CHECK-NEXT: b .LBB5_14 +; CHECK-NEXT: .LBB5_13: // %lpad3 +; CHECK-NEXT: .Ltmp31: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp32: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp33: +; CHECK-NEXT: .LBB5_14: // %check.za23 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB5_16 +; CHECK-NEXT: // %bb.15: // %eh.resume.sink.split +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB5_16: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB5_17: // %terminate.lpad +; CHECK-NEXT: .Ltmp34: +; CHECK-NEXT: bl __clang_call_terminate +; +; CHECK-LABEL: try_throw: +; CHECK: .Lfunc_begin6: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: sub x10, x29, #32 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: bl __cxa_allocate_exception +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x9, TPIDR2_EL0 +; CHECK-NEXT: cbnz x9, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB6_2: // %resume +; CHECK-NEXT: mov w9, #23 +; CHECK-NEXT: rdsvl x10, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: sturh w10, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x19, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp35: +; CHECK-NEXT: adrp x1, :got:except +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: mov x2, xzr +; CHECK-NEXT: ldr x1, [x1, :got_lo12:except] +; CHECK-NEXT: bl __cxa_throw +; CHECK-NEXT: .Ltmp36: +; CHECK-NEXT: // %bb.3: // %check.za +; CHECK-NEXT: .LBB6_4: // %lpad +; CHECK-NEXT: .Ltmp37: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp38: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp39: +; CHECK-NEXT: // %bb.5: // %invoke.cont +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB6_7 +; CHECK-NEXT: // %bb.6: // %restore.za2 +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB6_7: // %resume1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: b bar_shared +; CHECK-NEXT: .LBB6_8: // %lpad1 +; CHECK-NEXT: .Ltmp40: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp41: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp42: +; CHECK-NEXT: // %bb.9: // %check.za23 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB6_11 +; CHECK-NEXT: // %bb.10: // %restore.za24 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB6_11: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB6_12: // %terminate.lpad +; CHECK-NEXT: .Ltmp43: +; CHECK-NEXT: bl __clang_call_terminate +; +; CHECK-LABEL: catch_throw: +; CHECK: .Lfunc_begin7: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp44: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp45: +; CHECK-NEXT: // %bb.1: // %try.cont +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: b bar_shared +; CHECK-NEXT: .LBB7_2: // %lpad1 +; CHECK-NEXT: .Ltmp46: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x19, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_allocate_exception +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x9, TPIDR2_EL0 +; CHECK-NEXT: cbnz x9, .LBB7_4 +; CHECK-NEXT: // %bb.3: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB7_4: // %resume +; CHECK-NEXT: mov w9, #23 +; CHECK-NEXT: rdsvl x10, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: sturh w10, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp47: +; CHECK-NEXT: adrp x1, :got:except +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: mov x2, xzr +; CHECK-NEXT: ldr x1, [x1, :got_lo12:except] +; CHECK-NEXT: bl __cxa_throw +; CHECK-NEXT: .Ltmp48: +; CHECK-NEXT: // %bb.5: // %check.za +; CHECK-NEXT: .LBB7_6: // %lpad2 +; CHECK-NEXT: .Ltmp49: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp50: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp51: +; CHECK-NEXT: // %bb.7: // %check.za15 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB7_9 +; CHECK-NEXT: // %bb.8: // %restore.za16 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB7_9: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB7_10: // %terminate.lpad +; CHECK-NEXT: .Ltmp52: +; CHECK-NEXT: bl __clang_call_terminate +; +; CHECK-LABEL: nested: +; CHECK: .Lfunc_begin8: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x19, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp53: +; CHECK-NEXT: bl foo_shared +; CHECK-NEXT: .Ltmp54: +; CHECK-NEXT: // %bb.1: // %invoke.cont1 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x19 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp56: +; CHECK-NEXT: bl bar_shared +; CHECK-NEXT: .Ltmp57: +; CHECK-NEXT: .LBB8_2: // %try.cont14 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB8_3: // %lpad2 +; CHECK-NEXT: .Ltmp58: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: cmp w1, #2 +; CHECK-NEXT: b.ne .LBB8_14 +; CHECK-NEXT: // %bb.4: // %catch1 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp59: +; CHECK-NEXT: bl fizz_shared +; CHECK-NEXT: .Ltmp60: +; CHECK-NEXT: // %bb.5: // %invoke.cont2 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp64: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp65: +; CHECK-NEXT: // %bb.6: // %check.za +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB8_2 +; CHECK-NEXT: // %bb.7: // %restore.za16 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: b .LBB8_2 +; CHECK-NEXT: .LBB8_8: // %lpad4 +; CHECK-NEXT: .Ltmp66: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: b .LBB8_12 +; CHECK-NEXT: .LBB8_9: // %lpad3 +; CHECK-NEXT: .Ltmp61: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp62: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp63: +; CHECK-NEXT: // %bb.10: // %check.za23 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB8_12 +; CHECK-NEXT: // %bb.11: // %restore.za24 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB8_12: // %eh.cleanup +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB8_14 +; CHECK-NEXT: .LBB8_13: // %lpad1 +; CHECK-NEXT: .Ltmp55: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: .LBB8_14: // %catch2 +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp67: +; CHECK-NEXT: bl buzz_shared +; CHECK-NEXT: .Ltmp68: +; CHECK-NEXT: // %bb.15: // %invoke.cont3 +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB8_17 +; CHECK-NEXT: // %bb.16: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB8_17: // %resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB8_2 +; CHECK-NEXT: .LBB8_18: // %lpad5 +; CHECK-NEXT: .Ltmp69: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp70: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp71: +; CHECK-NEXT: // %bb.19: // %check.za35 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB8_21 +; CHECK-NEXT: // %bb.20: // %restore.za36 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB8_21: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB8_22: // %terminate.lpad +; CHECK-NEXT: .Ltmp72: +; CHECK-NEXT: bl __clang_call_terminate +; +; CHECK-LABEL: conditional_throw: +; CHECK: .Lfunc_begin9: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: tbnz w0, #0, .LBB9_3 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp73: +; CHECK-NEXT: bl bar_shared +; CHECK-NEXT: .Ltmp74: +; CHECK-NEXT: .LBB9_2: // %try.cont +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB9_3: // %if.then +; CHECK-NEXT: sub x9, x29, #32 +; CHECK-NEXT: mov w0, #4 +; CHECK-NEXT: sturh w8, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x9 +; CHECK-NEXT: bl __cxa_allocate_exception +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x9, TPIDR2_EL0 +; CHECK-NEXT: cbnz x9, .LBB9_5 +; CHECK-NEXT: // %bb.4: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB9_5: // %resume +; CHECK-NEXT: mov w9, #23 +; CHECK-NEXT: rdsvl x10, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: sturh w10, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x9, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x9 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp75: +; CHECK-NEXT: adrp x1, :got:except +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: mov x2, xzr +; CHECK-NEXT: ldr x1, [x1, :got_lo12:except] +; CHECK-NEXT: bl __cxa_throw +; CHECK-NEXT: .Ltmp76: +; CHECK-NEXT: // %bb.6: // %check.za +; CHECK-NEXT: .LBB9_7: // %lpad +; CHECK-NEXT: .Ltmp77: +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp78: +; CHECK-NEXT: bl fizz_shared +; CHECK-NEXT: .Ltmp79: +; CHECK-NEXT: // %bb.8: // %invoke.cont2 +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB9_10 +; CHECK-NEXT: // %bb.9: // %restore.za2 +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB9_10: // %resume1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB9_2 +; CHECK-NEXT: .LBB9_11: // %lpad1 +; CHECK-NEXT: .Ltmp80: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp81: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp82: +; CHECK-NEXT: // %bb.12: // %check.za27 +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB9_14 +; CHECK-NEXT: // %bb.13: // %restore.za28 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB9_14: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB9_15: // %terminate.lpad +; CHECK-NEXT: .Ltmp83: +; CHECK-NEXT: bl __clang_call_terminate +; +; CHECK-LABEL: throw_in_signature: +; CHECK: .Lfunc_begin10: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: tbz w0, #0, .LBB10_3 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: .Ltmp86: +; CHECK-NEXT: bl bar_shared +; CHECK-NEXT: .Ltmp87: +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w0, #42 +; CHECK-NEXT: b .LBB10_5 +; CHECK-NEXT: .LBB10_3: // %if.end +; CHECK-NEXT: .Ltmp84: +; CHECK-NEXT: bl fizz_shared +; CHECK-NEXT: .Ltmp85: +; CHECK-NEXT: // %bb.4: +; CHECK-NEXT: mov w0, #23 +; CHECK-NEXT: .LBB10_5: // %return +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB10_6: // %lpad +; CHECK-NEXT: .Ltmp88: +; CHECK-NEXT: bl __cxa_call_unexpected +; +; CHECK-LABEL: try_func: +; CHECK: .Lfunc_begin11: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: orr x10, xzr, #0xf +; CHECK-NEXT: madd x10, x8, x8, x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: and x10, x10, #0xfffffffffffffff0 +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: mov sp, x9 +; CHECK-NEXT: stur x9, [x29, #-32] +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x8, x29, #16 +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp89: +; CHECK-NEXT: bl fizz_shared +; CHECK-NEXT: .Ltmp90: +; CHECK-NEXT: .LBB11_1: // %try.cont +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB11_2: // %lpad +; CHECK-NEXT: .Ltmp91: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: cmp w1, #1 +; CHECK-NEXT: b.ne .LBB11_10 +; CHECK-NEXT: // %bb.3: // %catch +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __cxa_begin_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: sub x20, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: .Ltmp92: +; CHECK-NEXT: bl buzz_shared +; CHECK-NEXT: .Ltmp93: +; CHECK-NEXT: // %bb.4: // %invoke.cont +; CHECK-NEXT: sub x8, x29, #32 +; CHECK-NEXT: rdsvl x9, #1 +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: sturh w9, [x29, #-24] +; CHECK-NEXT: msr TPIDR2_EL0, x8 +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB11_6 +; CHECK-NEXT: // %bb.5: // %restore.za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB11_6: // %resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: b .LBB11_1 +; CHECK-NEXT: .LBB11_7: // %lpad1 +; CHECK-NEXT: .Ltmp94: +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: smstart za +; CHECK-NEXT: msr TPIDR2_EL0, x20 +; CHECK-NEXT: bl __arm_tpidr2_save +; CHECK-NEXT: .Ltmp95: +; CHECK-NEXT: bl __cxa_end_catch +; CHECK-NEXT: .Ltmp96: +; CHECK-NEXT: // %bb.8: // %check.za +; CHECK-NEXT: smstart za +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB11_10 +; CHECK-NEXT: // %bb.9: // %restore.za12 +; CHECK-NEXT: sub x0, x29, #16 +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB11_10: // %eh.resume +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: smstop za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl _Unwind_Resume +; CHECK-NEXT: .LBB11_11: // %terminate.lpad +; CHECK-NEXT: .Ltmp97: +; CHECK-NEXT: bl __clang_call_terminate diff --git a/llvm/test/CodeGen/AArch64/sme-exceptions-with-za-state.ll b/llvm/test/CodeGen/AArch64/sme-exceptions-with-za-state.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-exceptions-with-za-state.ll @@ -0,0 +1,1862 @@ +; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s + +; XFAIL: * + +declare void @foo_shared() "aarch64_pstate_za_shared" +declare void @bar_shared() "aarch64_pstate_za_shared" +declare void @fizz_shared() "aarch64_pstate_za_shared" +declare void @buzz_shared() "aarch64_pstate_za_shared" + +declare void @snap_new() "aarch64_pstate_za_new" + +declare i32 @normal_callee() + +@except = external constant ptr +@invalid_argument = external constant ptr +@overflow_error = external constant ptr +@runtime_error = external constant ptr + +; Try/Catch with one invoke of a private ZA function. +; Lazy-save must be set up for the invoke and for the private callee. + +define i32 @private_za_invoke() "aarch64_pstate_za_shared" "aarch64_pstate_sm_enabled" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@private_za_invoke() #2 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live5 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc6 = trunc i64 %live5 to i16 +; CHECK-NEXT: %tpidr2.obj.live7 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc6, ptr %tpidr2.obj.live7, align 2 +; CHECK-NEXT: %tpi.int8 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int8) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @normal_callee() +; CHECK-NEXT: to label %check.za unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 1) +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %return +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr29 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp10 = icmp eq i64 %tpidr29, 0 +; CHECK-NEXT: br i1 %cmp10, label %restore.za4, label %return +; CHECK: restore.za4: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 23, %resume ], [ 15, %check.za ], [ 15, %restore.za4 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret i32 %retval +; +entry: + invoke void @normal_callee() to label %return unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] + ret i32 %retval +} + +; Try/Catch with one invoke of a new ZA function. + +define i32 @new_za_invoke() "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@new_za_invoke() #3 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = call i1 @llvm.aarch64.sme.get.pstatesm() +; CHECK-NEXT: %pstate.sm.zext = zext i1 %0 to i64 +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live5 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc6 = trunc i64 %live5 to i16 +; CHECK-NEXT: %tpidr2.obj.live7 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc6, ptr %tpidr2.obj.live7, align 2 +; CHECK-NEXT: %tpi.int8 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int8) +; CHECK-NEXT: invoke void @snap_new() +; CHECK-NEXT: to label %check.za unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %1 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 %pstate.sm.zext) +; CHECK-NEXT: %2 = extractvalue { ptr, i32 } %1, 0 +; CHECK-NEXT: %3 = tail call ptr @__cxa_begin_catch(ptr %2) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %return +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr29 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp10 = icmp eq i64 %tpidr29, 0 +; CHECK-NEXT: br i1 %cmp10, label %restore.za4, label %return +; CHECK: restore.za4: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 23, %resume ], [ 15, %check.za ], [ 15, %restore.za4 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret i32 %retval +; +entry: + invoke void @snap_new() to label %return unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 23, %lpad ], [ 15, %entry ] + ret i32 %retval +} + +; Try/Catch with one invoke of a shared ZA function and a private ZA call in the +; (unconditional) catch block. + +; int unconditional() { +; try { +; foo(); +; } catch (...) { +; normal_callee(); +; return 23; +; } +; return 15; +; } + +define i32 @private_za_in_catch_block() "aarch64_pstate_za_shared" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@private_za_in_catch_block() #4 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N9 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN10 = mul i64 %N9, %N9 +; CHECK-NEXT: %buffer11 = alloca i8, i64 %NN10, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer11, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live12 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc13 = trunc i64 %live12 to i16 +; CHECK-NEXT: %tpidr2.obj.live14 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc13, ptr %tpidr2.obj.live14, align 2 +; CHECK-NEXT: %tpi.int15 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int15) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %return unwind label %lpad1 +; CHECK: lpad1: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: call void @normal_callee() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live3 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc4 = trunc i64 %live3 to i16 +; CHECK-NEXT: %tpidr2.obj.live5 = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc4, ptr %tpidr2.obj.live5, align 2 +; CHECK-NEXT: %tpi.int6 = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int6) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr27 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp8 = icmp eq i64 %tpidr27, 0 +; CHECK-NEXT: br i1 %cmp8, label %restore.za2, label %resume1 +; CHECK: restore.za2: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume1 +; CHECK: resume1: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 23, %resume1 ], [ 15, %entry ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret i32 %retval +; +entry: + invoke void @foo_shared() to label %return unwind label %lpad1 + +lpad1: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + call void @normal_callee() + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 23, %lpad1 ], [ 15, %entry ] + ret i32 %retval +} + +; Try/Catch with one invoke of a shared ZA function and a private ZA call in the +; (unconditional) catch block. + +; int unconditional() { +; try { +; foo(); +; } catch (...) { +; normal_callee(); +; return 23; +; } +; return 15; +; } + +define i32 @private_za_in_catch_block_from_new_za() "aarch64_pstate_za_new" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@private_za_in_catch_block_from_new_za() #5 personality i32 1 { +; CHECK-NEXT: prelude: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N11 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN12 = mul i64 %N11, %N11 +; CHECK-NEXT: %buffer13 = alloca i8, i64 %NN12, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer13, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr29 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp10 = icmp ne i64 %tpidr29, 0 +; CHECK-NEXT: br i1 %cmp10, label %save.za, label %entry +; CHECK: save.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %entry +; CHECK: entry: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live14 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc15 = trunc i64 %live14 to i16 +; CHECK-NEXT: %tpidr2.obj.live16 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc15, ptr %tpidr2.obj.live16, align 2 +; CHECK-NEXT: %tpi.int17 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int17) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %return unwind label %lpad1 +; CHECK: lpad1: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: call void @normal_callee() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live3 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc4 = trunc i64 %live3 to i16 +; CHECK-NEXT: %tpidr2.obj.live5 = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc4, ptr %tpidr2.obj.live5, align 2 +; CHECK-NEXT: %tpi.int6 = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int6) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr27 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp8 = icmp eq i64 %tpidr27, 0 +; CHECK-NEXT: br i1 %cmp8, label %restore.za2, label %resume1 +; CHECK: restore.za2: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume1 +; CHECK: resume1: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 23, %resume1 ], [ 15, %entry ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: ret i32 %retval +; +entry: + invoke void @foo_shared() to label %return unwind label %lpad1 + +lpad1: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + call void @normal_callee() + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 23, %lpad1 ], [ 15, %entry ] + ret i32 %retval +} + + +; +; Try/Catch with one catch clause. Set up & restore lazy-save +; +; void except_func() { +; try { +; foo(); +; } catch (const std::invalid_argument& e) { +; bar(); +; } +; fizz(); +; } + +define void @call_after_catch_block() "aarch64_pstate_za_shared" personality i32 0 { +; CHECK-LABEL: define {{[^@]+}}@call_after_catch_block() #4 personality i32 0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live4 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc5 = trunc i64 %live4 to i16 +; CHECK-NEXT: %tpidr2.obj.live6 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc5, ptr %tpidr2.obj.live6, align 2 +; CHECK-NEXT: %tpi.int7 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int7) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %try.cont unwind label %lpad1 +; CHECK: lpad1: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr @invalid_argument +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 1 +; CHECK-NEXT: %2 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @invalid_argument) +; CHECK-NEXT: %matches = icmp eq i32 %1, %2 +; CHECK-NEXT: br i1 %matches, label %catch, label %eh.resume +; CHECK: catch: +; CHECK-NEXT: %3 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %4 = tail call ptr @__cxa_begin_catch(ptr %3) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live8 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc9 = trunc i64 %live8 to i16 +; CHECK-NEXT: %tpidr2.obj.live10 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc9, ptr %tpidr2.obj.live10, align 2 +; CHECK-NEXT: %tpi.int11 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int11) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @bar_shared() +; CHECK-NEXT: to label %invoke.cont unwind label %lpad2 +; CHECK: invoke.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %try.cont +; CHECK: try.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: tail call void @fizz_shared() +; CHECK-NEXT: ret void +; CHECK: lpad2: +; CHECK-NEXT: %5 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live13 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc14 = trunc i64 %live13 to i16 +; CHECK-NEXT: %tpidr2.obj.live15 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc14, ptr %tpidr2.obj.live15, align 2 +; CHECK-NEXT: %tpi.int16 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int16) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za unwind label %terminate.lpad +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr217 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp18 = icmp eq i64 %tpidr217, 0 +; CHECK-NEXT: br i1 %cmp18, label %restore.za12, label %eh.resume +; CHECK: restore.za12: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: %lpad.val6.merged = phi { ptr, i32 } [ %0, %lpad1 ], [ %5, %check.za ], [ %5, %restore.za12 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %lpad.val6.merged +; CHECK: terminate.lpad: +; CHECK-NEXT: %6 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %7 = extractvalue { ptr, i32 } %6, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %7) +; CHECK-NEXT: unreachable +; + +entry: + invoke void @foo_shared() to label %try.cont unwind label %lpad1 + +lpad1: + %0 = landingpad { ptr, i32 } catch ptr @invalid_argument + %1 = extractvalue { ptr, i32 } %0, 1 + %2 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @invalid_argument) + %matches = icmp eq i32 %1, %2 + br i1 %matches, label %catch, label %eh.resume + +catch: + %3 = extractvalue { ptr, i32 } %0, 0 + %4 = tail call ptr @__cxa_begin_catch(ptr %3) + invoke void @bar_shared() to label %invoke.cont unwind label %lpad2 + +invoke.cont: + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + tail call void @fizz_shared() + ret void + +lpad2: + %5 = landingpad { ptr, i32 } cleanup + invoke void @__cxa_end_catch() to label %eh.resume unwind label %terminate.lpad + +eh.resume: + %lpad.val6.merged = phi { ptr, i32 } [ %0, %lpad1 ], [ %5, %lpad2 ] + resume { ptr, i32 } %lpad.val6.merged + +terminate.lpad: + %6 = landingpad { ptr, i32 } catch ptr null + %7 = extractvalue { ptr, i32 } %6, 0 + tail call void @__clang_call_terminate(ptr %7) + unreachable +} + +; +; Try/Catch with two catch clauses. Set up & restore lazy-save +; +; int multiple_catch() { +; try { +; foo(); +; } catch (const std::overflow_error& e) { +; bar(); +; } catch (const std::runtime_error& e) { +; fizz(); +; return 23; +; } +; return 42; +; } + +define i32 @multiple_catch() "aarch64_pstate_za_shared" personality i32 0 { +; CHECK-LABEL: define {{[^@]+}}@multiple_catch() #4 personality i32 0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live4 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc5 = trunc i64 %live4 to i16 +; CHECK-NEXT: %tpidr2.obj.live6 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc5, ptr %tpidr2.obj.live6, align 2 +; CHECK-NEXT: %tpi.int7 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int7) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %return unwind label %lpad1 +; CHECK: lpad1: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr @overflow_error +; CHECK-NEXT: catch ptr @runtime_error +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = extractvalue { ptr, i32 } %0, 1 +; CHECK-NEXT: %3 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @overflow_error) +; CHECK-NEXT: %matches = icmp eq i32 %2, %3 +; CHECK-NEXT: br i1 %matches, label %catch1, label %catch.fallthrough +; CHECK: catch1: +; CHECK-NEXT: %4 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live8 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc9 = trunc i64 %live8 to i16 +; CHECK-NEXT: %tpidr2.obj.live10 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc9, ptr %tpidr2.obj.live10, align 2 +; CHECK-NEXT: %tpi.int11 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int11) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @bar_shared() +; CHECK-NEXT: to label %return.sink.split unwind label %lpad3 +; CHECK: catch.fallthrough: +; CHECK-NEXT: %5 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @runtime_error) +; CHECK-NEXT: %matches1 = icmp eq i32 %2, %5 +; CHECK-NEXT: br i1 %matches1, label %catch2, label %eh.resume +; CHECK: catch2: +; CHECK-NEXT: %6 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live12 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc13 = trunc i64 %live12 to i16 +; CHECK-NEXT: %tpidr2.obj.live14 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc13, ptr %tpidr2.obj.live14, align 2 +; CHECK-NEXT: %tpi.int15 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int15) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @fizz_shared() +; CHECK-NEXT: to label %return.sink.split unwind label %lpad2 +; CHECK: lpad2: +; CHECK-NEXT: %7 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live17 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc18 = trunc i64 %live17 to i16 +; CHECK-NEXT: %tpidr2.obj.live19 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc18, ptr %tpidr2.obj.live19, align 2 +; CHECK-NEXT: %tpi.int20 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int20) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za unwind label %terminate.lpad +; CHECK: lpad3: +; CHECK-NEXT: %8 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live25 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc26 = trunc i64 %live25 to i16 +; CHECK-NEXT: %tpidr2.obj.live27 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc26, ptr %tpidr2.obj.live27, align 2 +; CHECK-NEXT: %tpi.int28 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int28) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za23 unwind label %terminate.lpad +; CHECK: return.sink.split: +; CHECK-NEXT: %retval.ph = phi i32 [ 42, %catch1 ], [ 23, %catch2 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %return +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 42, %entry ], [ %retval.ph, %resume ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret i32 %retval +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr221 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp22 = icmp eq i64 %tpidr221, 0 +; CHECK-NEXT: br i1 %cmp22, label %restore.za16, label %eh.resume +; CHECK: restore.za16: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: check.za23: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr229 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp30 = icmp eq i64 %tpidr229, 0 +; CHECK-NEXT: br i1 %cmp30, label %restore.za24, label %eh.resume +; CHECK: restore.za24: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: %lpad.val13.merged = phi { ptr, i32 } [ %0, %catch.fallthrough ], [ %7, %check.za ], [ %8, %check.za23 ], [ %7, %restore.za16 ], [ %8, %restore.za24 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %lpad.val13.merged +; CHECK: terminate.lpad: +; CHECK-NEXT: %9 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %10 = extractvalue { ptr, i32 } %9, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %10) +; CHECK-NEXT: unreachable +; +entry: + invoke void @foo_shared() + to label %return unwind label %lpad1 + +lpad1: + %0 = landingpad { ptr, i32 } + catch ptr @overflow_error + catch ptr @runtime_error + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = extractvalue { ptr, i32 } %0, 1 + %3 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @overflow_error) + %matches = icmp eq i32 %2, %3 + br i1 %matches, label %catch1, label %catch.fallthrough + +catch1: + %4 = tail call ptr @__cxa_begin_catch(ptr %1) + invoke void @bar_shared() + to label %return.sink.split unwind label %lpad3 + +catch.fallthrough: + %5 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @runtime_error) + %matches1 = icmp eq i32 %2, %5 + br i1 %matches1, label %catch2, label %eh.resume + +catch2: + %6 = tail call ptr @__cxa_begin_catch(ptr %1) + invoke void @fizz_shared() + to label %return.sink.split unwind label %lpad2 + +lpad2: + %7 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +lpad3: + %8 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +return.sink.split: + %retval.ph = phi i32 [ 42, %catch1 ], [ 23, %catch2 ] + tail call void @__cxa_end_catch() + br label %return + +return: + %retval = phi i32 [ 42, %entry ], [ %retval.ph, %return.sink.split ] + ret i32 %retval + +eh.resume: + %lpad.val13.merged = phi { ptr, i32 } [ %0, %catch.fallthrough ], [ %7, %lpad2 ], [ %8, %lpad3 ] + resume { ptr, i32 } %lpad.val13.merged + +terminate.lpad: + %9 = landingpad { ptr, i32 } + catch ptr null + %10 = extractvalue { ptr, i32 } %9, 0 + tail call void @__clang_call_terminate(ptr %10) + unreachable +} + +; +; Try/Catch with throw in try block. Set up & restore lazy-save +; +; void try_throw() { +; try { +; throw 23; +; } catch (...) { +; foo(); +; } +; bar(); +; return; +; } + +define void @try_throw() "aarch64_pstate_za_shared" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@try_throw() #4 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N9 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN10 = mul i64 %N9, %N9 +; CHECK-NEXT: %buffer11 = alloca i8, i64 %NN10, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer11, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: %exception = tail call ptr @__cxa_allocate_exception(i64 4) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: store i32 23, ptr %exception, align 4 +; CHECK-NEXT: %live13 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc14 = trunc i64 %live13 to i16 +; CHECK-NEXT: %tpidr2.obj.live15 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc14, ptr %tpidr2.obj.live15, align 2 +; CHECK-NEXT: %tpi.int16 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int16) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @except, ptr null) +; CHECK-NEXT: to label %check.za unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live19 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc20 = trunc i64 %live19 to i16 +; CHECK-NEXT: %tpidr2.obj.live21 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc20, ptr %tpidr2.obj.live21, align 2 +; CHECK-NEXT: %tpi.int22 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int22) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %invoke.cont unwind label %lpad1 +; CHECK: invoke.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live3 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc4 = trunc i64 %live3 to i16 +; CHECK-NEXT: %tpidr2.obj.live5 = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc4, ptr %tpidr2.obj.live5, align 2 +; CHECK-NEXT: %tpi.int6 = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int6) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr27 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp8 = icmp eq i64 %tpidr27, 0 +; CHECK-NEXT: br i1 %cmp8, label %restore.za2, label %resume1 +; CHECK: restore.za2: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume1 +; CHECK: resume1: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: tail call void @bar_shared() +; CHECK-NEXT: ret void +; CHECK: lpad1: +; CHECK-NEXT: %3 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live25 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc26 = trunc i64 %live25 to i16 +; CHECK-NEXT: %tpidr2.obj.live27 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc26, ptr %tpidr2.obj.live27, align 2 +; CHECK-NEXT: %tpi.int28 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int28) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za23 unwind label %terminate.lpad +; CHECK: check.za23: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr229 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp30 = icmp eq i64 %tpidr229, 0 +; CHECK-NEXT: br i1 %cmp30, label %restore.za24, label %eh.resume +; CHECK: restore.za24: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %3 +; CHECK: terminate.lpad: +; CHECK-NEXT: %4 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %5 = extractvalue { ptr, i32 } %4, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %5) +; CHECK-NEXT: unreachable +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr217 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp18 = icmp eq i64 %tpidr217, 0 +; CHECK-NEXT: br i1 %cmp18, label %restore.za12, label %unreachable +; CHECK: restore.za12: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %unreachable +; CHECK: unreachable: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: unreachable +; +entry: + %exception = tail call ptr @__cxa_allocate_exception(i64 4) + store i32 23, ptr %exception + invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @except, ptr null) + to label %unreachable unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + invoke void @foo_shared() + to label %invoke.cont unwind label %lpad1 + +invoke.cont: + tail call void @__cxa_end_catch() + tail call void @bar_shared() + ret void + +lpad1: + %3 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: + resume { ptr, i32 } %3 + +terminate.lpad: + %4 = landingpad { ptr, i32 } + catch ptr null + %5 = extractvalue { ptr, i32 } %4, 0 + tail call void @__clang_call_terminate(ptr %5) + unreachable + +unreachable: + unreachable +} + +; +; Try/Catch with throw in catch block. Set up & restore lazy-save +; +; void throw_except() { +; try { +; foo(); +; } catch (...) { +; throw 23; +; } +; bar(); +; return; +; } + +define void @catch_throw() "aarch64_pstate_za_shared" personality i32 0 { +; CHECK-LABEL: define {{[^@]+}}@catch_throw() #4 personality i32 0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live4 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc5 = trunc i64 %live4 to i16 +; CHECK-NEXT: %tpidr2.obj.live6 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc5, ptr %tpidr2.obj.live6, align 2 +; CHECK-NEXT: %tpi.int7 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int7) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %try.cont unwind label %lpad1 +; CHECK: lpad1: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: %exception = tail call ptr @__cxa_allocate_exception(i64 4) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: store i32 23, ptr %exception, align 4 +; CHECK-NEXT: %live9 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc10 = trunc i64 %live9 to i16 +; CHECK-NEXT: %tpidr2.obj.live11 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc10, ptr %tpidr2.obj.live11, align 2 +; CHECK-NEXT: %tpi.int12 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int12) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @except, ptr null) +; CHECK-NEXT: to label %check.za unwind label %lpad2 +; CHECK: lpad2: +; CHECK-NEXT: %3 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live17 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc18 = trunc i64 %live17 to i16 +; CHECK-NEXT: %tpidr2.obj.live19 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc18, ptr %tpidr2.obj.live19, align 2 +; CHECK-NEXT: %tpi.int20 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int20) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za15 unwind label %terminate.lpad +; CHECK: try.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: tail call void @bar_shared() +; CHECK-NEXT: ret void +; CHECK: check.za15: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr221 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp22 = icmp eq i64 %tpidr221, 0 +; CHECK-NEXT: br i1 %cmp22, label %restore.za16, label %eh.resume +; CHECK: restore.za16: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %3 +; CHECK: terminate.lpad: +; CHECK-NEXT: %4 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %5 = extractvalue { ptr, i32 } %4, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %5) +; CHECK-NEXT: unreachable +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr213 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp14 = icmp eq i64 %tpidr213, 0 +; CHECK-NEXT: br i1 %cmp14, label %restore.za8, label %unreachable +; CHECK: restore.za8: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %unreachable +; CHECK: unreachable: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: unreachable +; +entry: + invoke void @foo_shared() + to label %try.cont unwind label %lpad1 + +lpad1: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + %exception = tail call ptr @__cxa_allocate_exception(i64 4) + store i32 23, ptr %exception + invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @except, ptr null) + to label %unreachable unwind label %lpad2 + +lpad2: + %3 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +try.cont: + tail call void @bar_shared() + ret void + +eh.resume: + resume { ptr, i32 } %3 + +terminate.lpad: + %4 = landingpad { ptr, i32 } + catch ptr null + %5 = extractvalue { ptr, i32 } %4, 0 + tail call void @__clang_call_terminate(ptr %5) + unreachable + +unreachable: + unreachable +} + +; +; Nested Try/Catch. Set up & restore lazy-save +; +; void nested() { +; try { +; foo(); +; try { +; bar(); +; } catch (const std::runtime_error& e) { +; fizz(); +; } +; } +; catch (...) { +; buzz(); +; } +; } + +define void @nested() "aarch64_pstate_za_shared" personality i32 0 { +; CHECK-LABEL: define {{[^@]+}}@nested() #4 personality i32 0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live4 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc5 = trunc i64 %live4 to i16 +; CHECK-NEXT: %tpidr2.obj.live6 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc5, ptr %tpidr2.obj.live6, align 2 +; CHECK-NEXT: %tpi.int7 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int7) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @foo_shared() +; CHECK-NEXT: to label %invoke.cont1 unwind label %lpad1 +; CHECK: invoke.cont1: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live8 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc9 = trunc i64 %live8 to i16 +; CHECK-NEXT: %tpidr2.obj.live10 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc9, ptr %tpidr2.obj.live10, align 2 +; CHECK-NEXT: %tpi.int11 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int11) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @bar_shared() +; CHECK-NEXT: to label %try.cont14 unwind label %lpad2 +; CHECK: lpad1: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: br label %catch2 +; CHECK: lpad2: +; CHECK-NEXT: %2 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr @runtime_error +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %3 = extractvalue { ptr, i32 } %2, 0 +; CHECK-NEXT: %4 = extractvalue { ptr, i32 } %2, 1 +; CHECK-NEXT: %5 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @runtime_error) +; CHECK-NEXT: %matches = icmp eq i32 %4, %5 +; CHECK-NEXT: br i1 %matches, label %catch1, label %catch2 +; CHECK: catch1: +; CHECK-NEXT: %6 = tail call ptr @__cxa_begin_catch(ptr %3) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live12 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc13 = trunc i64 %live12 to i16 +; CHECK-NEXT: %tpidr2.obj.live14 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc13, ptr %tpidr2.obj.live14, align 2 +; CHECK-NEXT: %tpi.int15 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int15) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @fizz_shared() +; CHECK-NEXT: to label %invoke.cont2 unwind label %lpad3 +; CHECK: invoke.cont2: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live17 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc18 = trunc i64 %live17 to i16 +; CHECK-NEXT: %tpidr2.obj.live19 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc18, ptr %tpidr2.obj.live19, align 2 +; CHECK-NEXT: %tpi.int20 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int20) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za unwind label %lpad4 +; CHECK: lpad3: +; CHECK-NEXT: %7 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %live25 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc26 = trunc i64 %live25 to i16 +; CHECK-NEXT: %tpidr2.obj.live27 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc26, ptr %tpidr2.obj.live27, align 2 +; CHECK-NEXT: %tpi.int28 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int28) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za23 unwind label %terminate.lpad +; CHECK: lpad4: +; CHECK-NEXT: %8 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: br label %eh.cleanup +; CHECK: check.za23: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr229 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp30 = icmp eq i64 %tpidr229, 0 +; CHECK-NEXT: br i1 %cmp30, label %restore.za24, label %eh.cleanup +; CHECK: restore.za24: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.cleanup +; CHECK: eh.cleanup: +; CHECK-NEXT: %pn = phi { ptr, i32 } [ %8, %lpad4 ], [ %7, %check.za23 ], [ %7, %restore.za24 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %exn.slot.0 = extractvalue { ptr, i32 } %pn, 0 +; CHECK-NEXT: br label %catch2 +; CHECK: catch2: +; CHECK-NEXT: %exn.slot.1 = phi ptr [ %exn.slot.0, %eh.cleanup ], [ %3, %lpad2 ], [ %1, %lpad1 ] +; CHECK-NEXT: %9 = tail call ptr @__cxa_begin_catch(ptr %exn.slot.1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live31 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc32 = trunc i64 %live31 to i16 +; CHECK-NEXT: %tpidr2.obj.live33 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc32, ptr %tpidr2.obj.live33, align 2 +; CHECK-NEXT: %tpi.int34 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int34) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @buzz_shared() +; CHECK-NEXT: to label %invoke.cont3 unwind label %lpad5 +; CHECK: invoke.cont3: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %try.cont14 +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr221 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp22 = icmp eq i64 %tpidr221, 0 +; CHECK-NEXT: br i1 %cmp22, label %restore.za16, label %try.cont14 +; CHECK: restore.za16: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %try.cont14 +; CHECK: try.cont14: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret void +; CHECK: lpad5: +; CHECK-NEXT: %10 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live37 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc38 = trunc i64 %live37 to i16 +; CHECK-NEXT: %tpidr2.obj.live39 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc38, ptr %tpidr2.obj.live39, align 2 +; CHECK-NEXT: %tpi.int40 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int40) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za35 unwind label %terminate.lpad +; CHECK: check.za35: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr241 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp42 = icmp eq i64 %tpidr241, 0 +; CHECK-NEXT: br i1 %cmp42, label %restore.za36, label %eh.resume +; CHECK: restore.za36: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %10 +; CHECK: terminate.lpad: +; CHECK-NEXT: %11 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %12 = extractvalue { ptr, i32 } %11, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %12) +; CHECK-NEXT: unreachable +; +entry: + invoke void @foo_shared() + to label %invoke.cont1 unwind label %lpad1 + +invoke.cont1: + invoke void @bar_shared() + to label %try.cont14 unwind label %lpad2 + +lpad1: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + br label %catch2 + +lpad2: + %2 = landingpad { ptr, i32 } + catch ptr @runtime_error + catch ptr null + %3 = extractvalue { ptr, i32 } %2, 0 + %4 = extractvalue { ptr, i32 } %2, 1 + %5 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @runtime_error) + %matches = icmp eq i32 %4, %5 + br i1 %matches, label %catch1, label %catch2 + +catch1: + %6 = tail call ptr @__cxa_begin_catch(ptr %3) + invoke void @fizz_shared() + to label %invoke.cont2 unwind label %lpad3 + +invoke.cont2: + invoke void @__cxa_end_catch() + to label %try.cont14 unwind label %lpad4 + +lpad3: + %7 = landingpad { ptr, i32 } + catch ptr null + invoke void @__cxa_end_catch() + to label %eh.cleanup unwind label %terminate.lpad + +lpad4: + %8 = landingpad { ptr, i32 } + catch ptr null + br label %eh.cleanup + +eh.cleanup: + %pn = phi { ptr, i32 } [ %8, %lpad4 ], [ %7, %lpad3 ] + %exn.slot.0 = extractvalue { ptr, i32 } %pn, 0 + br label %catch2 + +catch2: + %exn.slot.1 = phi ptr [ %exn.slot.0, %eh.cleanup ], [ %3, %lpad2 ], [ %1, %lpad1 ] + %9 = tail call ptr @__cxa_begin_catch(ptr %exn.slot.1) + invoke void @buzz_shared() + to label %invoke.cont3 unwind label %lpad5 + +invoke.cont3: + tail call void @__cxa_end_catch() + br label %try.cont14 + +try.cont14: + ret void + +lpad5: + %10 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: + resume { ptr, i32 } %10 + +terminate.lpad: + %11 = landingpad { ptr, i32 } + catch ptr null + %12 = extractvalue { ptr, i32 } %11, 0 + tail call void @__clang_call_terminate(ptr %12) + unreachable +} + +; void try_condition(bool cond) { +; try { +; if (cond) { +; throw 23; +; } +; bar(); +; } +; catch (...) { +; fizz(); +; } +; } + +define void @conditional_throw(i1 %cond) "aarch64_pstate_za_shared" personality i32 0 { +; CHECK-LABEL: define {{[^@]+}}@conditional_throw(i1 %cond) #4 personality i32 0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N9 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN10 = mul i64 %N9, %N9 +; CHECK-NEXT: %buffer11 = alloca i8, i64 %NN10, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer11, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: br i1 %cond, label %if.then, label %if.end +; CHECK: if.then: +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: %exception = tail call ptr @__cxa_allocate_exception(i64 4) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: store i32 23, ptr %exception, align 16 +; CHECK-NEXT: %live13 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc14 = trunc i64 %live13 to i16 +; CHECK-NEXT: %tpidr2.obj.live15 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc14, ptr %tpidr2.obj.live15, align 2 +; CHECK-NEXT: %tpi.int16 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int16) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @except, ptr null) +; CHECK-NEXT: to label %check.za unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %2 = tail call ptr @__cxa_begin_catch(ptr %1) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live19 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc20 = trunc i64 %live19 to i16 +; CHECK-NEXT: %tpidr2.obj.live21 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc20, ptr %tpidr2.obj.live21, align 2 +; CHECK-NEXT: %tpi.int22 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int22) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @fizz_shared() +; CHECK-NEXT: to label %invoke.cont2 unwind label %lpad1 +; CHECK: invoke.cont2: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live3 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc4 = trunc i64 %live3 to i16 +; CHECK-NEXT: %tpidr2.obj.live5 = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc4, ptr %tpidr2.obj.live5, align 2 +; CHECK-NEXT: %tpi.int6 = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int6) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr27 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp8 = icmp eq i64 %tpidr27, 0 +; CHECK-NEXT: br i1 %cmp8, label %restore.za2, label %resume1 +; CHECK: restore.za2: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume1 +; CHECK: resume1: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %try.cont +; CHECK: try.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret void +; CHECK: if.end: +; CHECK-NEXT: %live23 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc24 = trunc i64 %live23 to i16 +; CHECK-NEXT: %tpidr2.obj.live25 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc24, ptr %tpidr2.obj.live25, align 2 +; CHECK-NEXT: %tpi.int26 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int26) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @bar_shared() +; CHECK-NEXT: to label %try.cont unwind label %lpad +; CHECK: lpad1: +; CHECK-NEXT: %3 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live29 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc30 = trunc i64 %live29 to i16 +; CHECK-NEXT: %tpidr2.obj.live31 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc30, ptr %tpidr2.obj.live31, align 2 +; CHECK-NEXT: %tpi.int32 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int32) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za27 unwind label %terminate.lpad +; CHECK: check.za27: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr233 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp34 = icmp eq i64 %tpidr233, 0 +; CHECK-NEXT: br i1 %cmp34, label %restore.za28, label %eh.resume +; CHECK: restore.za28: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %3 +; CHECK: terminate.lpad: +; CHECK-NEXT: %4 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %5 = extractvalue { ptr, i32 } %4, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %5) +; CHECK-NEXT: unreachable +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr217 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp18 = icmp eq i64 %tpidr217, 0 +; CHECK-NEXT: br i1 %cmp18, label %restore.za12, label %unreachable +; CHECK: restore.za12: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %unreachable +; CHECK: unreachable: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: unreachable +; +entry: + br i1 %cond, label %if.then, label %if.end + +if.then: + %exception = tail call ptr @__cxa_allocate_exception(i64 4) + store i32 23, ptr %exception, align 16 + invoke void @__cxa_throw(ptr nonnull %exception, ptr nonnull @except, ptr null) + to label %unreachable unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr null + %1 = extractvalue { ptr, i32 } %0, 0 + %2 = tail call ptr @__cxa_begin_catch(ptr %1) + invoke void @fizz_shared() + to label %invoke.cont2 unwind label %lpad1 + +invoke.cont2: + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret void + +if.end: + invoke void @bar_shared() + to label %try.cont unwind label %lpad + +lpad1: + %3 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: + resume { ptr, i32 } %3 + +terminate.lpad: + %4 = landingpad { ptr, i32 } + catch ptr null + %5 = extractvalue { ptr, i32 } %4, 0 + tail call void @__clang_call_terminate(ptr %5) + unreachable + +unreachable: + unreachable +} + +; +; int foo_throw(bool a) throw(const char *) { +; if (a) { +; bar(); +; return 42; +; } +; fizz(); +; return 23; +; } +; + +define i32 @throw_in_signature(i1 %a) "aarch64_pstate_za_shared" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@throw_in_signature(i1 %a) #4 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: br i1 %a, label %if.then, label %if.end +; CHECK: if.then: +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @bar_shared() +; CHECK-NEXT: to label %return unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: filter [1 x ptr] [ptr @except] +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 1 +; CHECK-NEXT: %ehspec.fails = icmp slt i32 %1, 0 +; CHECK-NEXT: br i1 %ehspec.fails, label %ehspec.unexpected, label %eh.resume +; CHECK: ehspec.unexpected: +; CHECK-NEXT: %2 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: tail call void @__cxa_call_unexpected(ptr %2) +; CHECK-NEXT: unreachable +; CHECK: if.end: +; CHECK-NEXT: %live1 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc2 = trunc i64 %live1 to i16 +; CHECK-NEXT: %tpidr2.obj.live3 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc2, ptr %tpidr2.obj.live3, align 2 +; CHECK-NEXT: %tpi.int4 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int4) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @fizz_shared() +; CHECK-NEXT: to label %return unwind label %lpad +; CHECK: return: +; CHECK-NEXT: %retval = phi i32 [ 42, %if.then ], [ 23, %if.end ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret i32 %retval +; CHECK: eh.resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %0 +; +entry: + br i1 %a, label %if.then, label %if.end + +if.then: + invoke void @bar_shared() + to label %return unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + filter [1 x ptr] [ptr @except] + %1 = extractvalue { ptr, i32 } %0, 1 + %ehspec.fails = icmp slt i32 %1, 0 + br i1 %ehspec.fails, label %ehspec.unexpected, label %eh.resume + +ehspec.unexpected: + %2 = extractvalue { ptr, i32 } %0, 0 + tail call void @__cxa_call_unexpected(ptr %2) #2 + unreachable + +if.end: + invoke void @fizz_shared() + to label %return unwind label %lpad + +return: + %retval = phi i32 [ 42, %if.then ], [ 23, %if.end ] + ret i32 %retval + +eh.resume: + resume { ptr, i32 } %0 +} + +; +; void try_func() try { +; fizz(); +; } +; catch (const std::runtime_error& e) { +; buzz(); +; } +; + +define void @try_func() "aarch64_pstate_za_shared" personality i32 1 { +; CHECK-LABEL: define {{[^@]+}}@try_func() #4 personality i32 1 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %tpidr2.invoke.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N1 = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN2 = mul i64 %N1, %N1 +; CHECK-NEXT: %buffer3 = alloca i8, i64 %NN2, align 16 +; CHECK-NEXT: %tpidr2.invoke.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer3, ptr %tpidr2.invoke.obj.buffer, align 8 +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live4 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc5 = trunc i64 %live4 to i16 +; CHECK-NEXT: %tpidr2.obj.live6 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc5, ptr %tpidr2.obj.live6, align 2 +; CHECK-NEXT: %tpi.int7 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int7) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @fizz_shared() +; CHECK-NEXT: to label %try.cont unwind label %lpad +; CHECK: lpad: +; CHECK-NEXT: %0 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr @runtime_error +; CHECK-NEXT: %1 = extractvalue { ptr, i32 } %0, 1 +; CHECK-NEXT: %2 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @runtime_error) +; CHECK-NEXT: %matches = icmp eq i32 %1, %2 +; CHECK-NEXT: br i1 %matches, label %catch, label %eh.resume +; CHECK: catch: +; CHECK-NEXT: %3 = extractvalue { ptr, i32 } %0, 0 +; CHECK-NEXT: %4 = tail call ptr @__cxa_begin_catch(ptr %3) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: %live8 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc9 = trunc i64 %live8 to i16 +; CHECK-NEXT: %tpidr2.obj.live10 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc9, ptr %tpidr2.obj.live10, align 2 +; CHECK-NEXT: %tpi.int11 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int11) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: invoke void @buzz_shared() +; CHECK-NEXT: to label %invoke.cont unwind label %lpad1 +; CHECK: invoke.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: tail call void @__cxa_end_catch() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: br label %try.cont +; CHECK: try.cont: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret void +; CHECK: lpad1: +; CHECK-NEXT: %5 = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: %live13 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc14 = trunc i64 %live13 to i16 +; CHECK-NEXT: %tpidr2.obj.live15 = getelementptr %tpidr2_ty, ptr %tpidr2.invoke.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc14, ptr %tpidr2.obj.live15, align 2 +; CHECK-NEXT: %tpi.int16 = ptrtoint ptr %tpidr2.invoke.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int16) +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.save() +; CHECK-NEXT: invoke void @__cxa_end_catch() +; CHECK-NEXT: to label %check.za unwind label %terminate.lpad +; CHECK: check.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr217 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp18 = icmp eq i64 %tpidr217, 0 +; CHECK-NEXT: br i1 %cmp18, label %restore.za12, label %eh.resume +; CHECK: restore.za12: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.invoke.obj) +; CHECK-NEXT: br label %eh.resume +; CHECK: eh.resume: +; CHECK-NEXT: %lpad.val6.merged = phi { ptr, i32 } [ %0, %lpad ], [ %5, %check.za ], [ %5, %restore.za12 ] +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: call void @llvm.aarch64.sme.stop.pstateza() +; CHECK-NEXT: resume { ptr, i32 } %lpad.val6.merged +; CHECK: terminate.lpad: +; CHECK-NEXT: %6 = landingpad { ptr, i32 } +; CHECK-NEXT: catch ptr null +; CHECK-NEXT: %7 = extractvalue { ptr, i32 } %6, 0 +; CHECK-NEXT: tail call void @__clang_call_terminate(ptr %7) +; CHECK-NEXT: unreachable +; +entry: + invoke void @fizz_shared() + to label %try.cont unwind label %lpad + +lpad: + %0 = landingpad { ptr, i32 } + catch ptr @runtime_error + %1 = extractvalue { ptr, i32 } %0, 1 + %2 = tail call i32 @llvm.eh.typeid.for(ptr nonnull @runtime_error) + %matches = icmp eq i32 %1, %2 + br i1 %matches, label %catch, label %eh.resume + +catch: + %3 = extractvalue { ptr, i32 } %0, 0 + %4 = tail call ptr @__cxa_begin_catch(ptr %3) #4 + invoke void @buzz_shared() + to label %invoke.cont unwind label %lpad1 + +invoke.cont: + tail call void @__cxa_end_catch() + br label %try.cont + +try.cont: + ret void + +lpad1: + %5 = landingpad { ptr, i32 } + cleanup + invoke void @__cxa_end_catch() + to label %eh.resume unwind label %terminate.lpad + +eh.resume: + %lpad.val6.merged = phi { ptr, i32 } [ %0, %lpad ], [ %5, %lpad1 ] + resume { ptr, i32 } %lpad.val6.merged + +terminate.lpad: + %6 = landingpad { ptr, i32 } + catch ptr null + %7 = extractvalue { ptr, i32 } %6, 0 + tail call void @__clang_call_terminate(ptr %7) + unreachable +} + +declare i32 @llvm.eh.typeid.for(ptr) +declare ptr @__cxa_begin_catch(ptr) +declare void @__cxa_end_catch() +declare ptr @__cxa_allocate_exception(i64) +declare void @__cxa_throw(ptr, ptr, ptr) +declare void @__cxa_call_unexpected(ptr) +declare void @__clang_call_terminate(ptr) + +;. +; CHECK: attributes #0 = { "aarch64_pstate_za_shared" } +; CHECK: attributes #1 = { "aarch64_pstate_za_new" } +; CHECK: attributes #2 = { "aarch64_expanded_pstate_za" "aarch64_pstate_sm_enabled" "aarch64_pstate_za_shared" } +; CHECK: attributes #3 = { "aarch64_expanded_pstate_za" "aarch64_pstate_sm_compatible" "aarch64_pstate_za_shared" } +; CHECK: attributes #4 = { "aarch64_expanded_pstate_za" "aarch64_pstate_za_shared" } +; CHECK: attributes #5 = { "aarch64_expanded_pstate_za" "aarch64_pstate_za_new" } +; CHECK: attributes #6 = { nounwind readnone } +; CHECK: attributes #7 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #8 = { nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #9 = { inaccessiblememonly nocallback nofree nosync nounwind readonly willreturn } +;. diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-add.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addha za0.s, p0/m, p1/m, z0.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addha.nxv4i32(i32 0, %pn, %pm, %zn) ret void } @@ -15,7 +15,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addva za3.s, p0/m, p1/m, z0.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addva.nxv4i32(i32 3, %pn, %pm, %zn) ret void } @@ -24,7 +24,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addha za0.d, p0/m, p1/m, z0.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addha.nxv2i64(i32 0, %pn, %pm, %zn) ret void } @@ -33,11 +33,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: addva za7.d, p0/m, p1/m, z0.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, %pn, %pm, %zn) + call void @llvm.aarch64.sme.addva.nxv2i64(i32 7, %pn, %pm, %zn) ret void } -declare void @llvm.aarch64.sme.addha.nxv4i32(i64, , , ) -declare void @llvm.aarch64.sme.addha.nxv2i64(i64, , , ) -declare void @llvm.aarch64.sme.addva.nxv4i32(i64, , , ) -declare void @llvm.aarch64.sme.addva.nxv2i64(i64, , , ) +declare void @llvm.aarch64.sme.addha.nxv4i32(i32, , , ) +declare void @llvm.aarch64.sme.addha.nxv2i64(i32, , , ) +declare void @llvm.aarch64.sme.addva.nxv4i32(i32, , , ) +declare void @llvm.aarch64.sme.addva.nxv2i64(i32, , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-get-live-za-slices.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-get-live-za-slices.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-get-live-za-slices.ll @@ -0,0 +1,13 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +define i64 @sme_get_live_slices() { +; CHECK-LABEL: sme_get_live_slices: +; CHECK: // %bb.0: +; CHECK-NEXT: rdsvl x0, #1 +; CHECK-NEXT: ret + %slices = call i64 @llvm.aarch64.sme.get.live.za.slices() + ret i64 %slices +} + +declare i64 @llvm.aarch64.sme.get.live.za.slices() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: ld1b {za0v.b[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %ptr, i32 0, i32 0) ret void; } @@ -25,8 +25,8 @@ ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1b.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1b.vert( %pg, ptr %base, i32 0, i32 %tileslice) ret void; } @@ -41,10 +41,10 @@ ; CHECK-NEXT: ld1h {za1v.h[w12, 7]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i64 1, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %ptr, i32 1, i32 %tileslice) ret void; } @@ -58,8 +58,8 @@ ; CHECK-NEXT: ret %base = getelementptr i16, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %base, i64 1, i32 0) + call void @llvm.aarch64.sme.ld1h.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1h.vert( %pg, ptr %base, i32 1, i32 0) ret void; } @@ -78,14 +78,14 @@ ; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i64 3, i32 %tileslice) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 2, i32 %tileslice) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 3, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 2, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %ptr, i32 3, i32 0) ret void; } @@ -99,8 +99,8 @@ ; CHECK-NEXT: ret %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %base, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1w.vert( %pg, ptr %base, i32 3, i32 %tileslice) ret void; } @@ -127,22 +127,22 @@ ; CHECK-NEXT: ld1d {za7v.d[w12, 1]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 4, i32 %tileslice) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i64 7, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 4, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %ptr, i32 7, i32 %tileslice) ret void; } @@ -156,8 +156,8 @@ ; CHECK-NEXT: ret %base = getelementptr i64, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %base, i64 7, i32 0) + call void @llvm.aarch64.sme.ld1d.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.ld1d.vert( %pg, ptr %base, i32 7, i32 0) ret void; } @@ -198,38 +198,38 @@ ; CHECK-NEXT: ld1q {za14v.q[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i64 15, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %ptr, i32 15, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %ptr, i32 15, i32 0) ret void; } @@ -241,8 +241,8 @@ ; CHECK-NEXT: ld1q {za15v.q[w12, 0]}, p0/z, [x0, x1, lsl #4] ; CHECK-NEXT: ret %base = getelementptr i128, ptr %ptr, i64 %index - call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %base, i64 15, i32 0) + call void @llvm.aarch64.sme.ld1q.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.ld1q.vert( %pg, ptr %base, i32 15, i32 0) ret void; } @@ -317,9 +317,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i64 0, i32 %base) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i64 0, i32 %add1) - call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i64 0, i32 %add2) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i32 0, i32 %base) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i32 0, i32 %add1) + call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %src, i32 0, i32 %add2) %inc = add nuw nsw i32 %i, 1 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -329,16 +329,16 @@ } -declare void @llvm.aarch64.sme.ld1b.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1h.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1w.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1d.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1q.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1b.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1h.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1w.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1d.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.ld1q.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.ld1b.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1h.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1w.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1d.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1q.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1b.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1h.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1w.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1d.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.ld1q.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.ldr(i32, ptr) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mopa.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmopa za0.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -15,7 +15,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmopa za1.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -24,7 +24,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smopa za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -33,7 +33,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smopa za0.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -42,7 +42,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umopa za3.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -51,7 +51,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umopa za1.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -60,7 +60,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmopa za0.s, p0/m, p1/m, z0.s, z1.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.nxv4f32(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.nxv4f32(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -69,7 +69,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmopa za2.d, p0/m, p1/m, z0.d, z1.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mopa.nxv2f64(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mopa.nxv2f64(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -78,7 +78,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumopa za1.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -87,7 +87,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumopa za3.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -96,7 +96,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmopa za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -105,22 +105,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmopa za7.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 7, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32 7, %pn, %pm, %zn, %zm) ret void } attributes #0 = { "target-features"="+sme-i16i64" } attributes #1 = { "target-features"="+sme-f64f64" } -declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64, , , , ) -declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64, , , , ) -declare void @llvm.aarch64.sme.mopa.nxv4f32(i64, , , , ) -declare void @llvm.aarch64.sme.mopa.nxv2f64(i64, , , , ) -declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.mopa.wide.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.mopa.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.mopa.nxv2f64(i32, , , , ) +declare void @llvm.aarch64.sme.smopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.smopa.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.umopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.umopa.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i32, , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mops.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bfmops za0.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -15,7 +15,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmops za1.s, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.wide.nxv8f16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -24,7 +24,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smops za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smops.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -33,7 +33,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: smops za0.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.smops.wide.nxv8i16(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -42,7 +42,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umops za3.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umops.wide.nxv16i8(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -51,7 +51,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umops za1.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.umops.wide.nxv8i16(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -60,7 +60,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmops za0.s, p0/m, p1/m, z0.s, z1.s ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.nxv4f32(i64 0, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.nxv4f32(i32 0, %pn, %pm, %zn, %zm) ret void } @@ -69,7 +69,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmops za2.d, p0/m, p1/m, z0.d, z1.d ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.mops.nxv2f64(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.mops.nxv2f64(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -78,7 +78,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumops za1.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 1, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32 1, %pn, %pm, %zn, %zm) ret void } @@ -87,7 +87,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sumops za3.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 3, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32 3, %pn, %pm, %zn, %zm) ret void } @@ -96,7 +96,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmops za2.s, p0/m, p1/m, z0.b, z1.b ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 2, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32 2, %pn, %pm, %zn, %zm) ret void } @@ -105,22 +105,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: usmops za7.d, p0/m, p1/m, z0.h, z1.h ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 7, %pn, %pm, %zn, %zm) + call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32 7, %pn, %pm, %zn, %zm) ret void } attributes #0 = { "target-features"="+sme-i16i64" } attributes #1 = { "target-features"="+sme-f64f64" } -declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64, , , , ) -declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i64, , , , ) -declare void @llvm.aarch64.sme.mops.nxv4f32(i64, , , , ) -declare void @llvm.aarch64.sme.mops.nxv2f64(i64, , , , ) -declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64, , , , ) -declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64, , , , ) -declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64, , , , ) +declare void @llvm.aarch64.sme.mops.wide.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.mops.wide.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.mops.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.mops.nxv2f64(i32, , , , ) +declare void @llvm.aarch64.sme.smops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.smops.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.umops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.umops.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.sumops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.sumops.wide.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.usmops.wide.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.usmops.wide.nxv8i16(i32, , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll @@ -22,21 +22,21 @@ ; CHECK-NEXT: mov z0.b, p0/m, za0h.b[w12, 14] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice) %tileslice.2 = add i32 %tileslice, 2 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.2) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.4 = add i32 %tileslice, 4 - %z2 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.4) + %z2 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.6 = add i32 %tileslice, 6 - %z3 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.6) + %z3 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.6) %tileslice.8 = add i32 %tileslice, 8 - %z4 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.8) + %z4 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.8) %tileslice.10 = add i32 %tileslice, 10 - %z5 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.10) + %z5 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.10) %tileslice.12 = add i32 %tileslice, 12 - %z6 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.12) + %z6 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.12) %tileslice.14 = add i32 %tileslice, 14 - %z7 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.14) + %z7 = call @llvm.aarch64.sme.read.horiz.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.14) ret %z0 } @@ -62,21 +62,21 @@ ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.3 = add i32 %tileslice, 3 - %z1 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.3) + %z1 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.3) %tileslice.5 = add i32 %tileslice, 5 - %z2 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.5) + %z2 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.5) %tileslice.7 = add i32 %tileslice, 7 - %z3 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.7) + %z3 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.7) %tileslice.9 = add i32 %tileslice, 9 - %z4 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.9) + %z4 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.9) %tileslice.11 = add i32 %tileslice, 11 - %z5 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.11) + %z5 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.11) %tileslice.13 = add i32 %tileslice, 13 - %z6 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.13) + %z6 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.13) %tileslice.15 = add i32 %tileslice, 15 - %z7 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i64 0, i32 %tileslice.15) + %z7 = call @llvm.aarch64.sme.read.vert.nxv16i8( %zd, %pg, i32 0, i32 %tileslice.15) ret %z0 } @@ -93,13 +93,13 @@ ; CHECK-NEXT: mov z0.h, p0/m, za0h.h[w12, 6] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice) %tileslice.2 = add i32 %tileslice, 2 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice.2) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.4 = add i32 %tileslice, 4 - %z2 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice.4) + %z2 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.6 = add i32 %tileslice, 6 - %z3 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i64 0, i32 %tileslice.6) + %z3 = call @llvm.aarch64.sme.read.horiz.nxv8i16( %zd, %pg, i32 0, i32 %tileslice.6) ret %z0 } @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.1) %tileslice.3 = add i32 %tileslice, 3 - %z1 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.3) + %z1 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.3) %tileslice.5 = add i32 %tileslice, 5 - %z2 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.5) + %z2 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.5) %tileslice.7 = add i32 %tileslice, 7 - %z3 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i64 1, i32 %tileslice.7) + %z3 = call @llvm.aarch64.sme.read.vert.nxv8i16( %zd, %pg, i32 1, i32 %tileslice.7) ret %z0 } @@ -148,21 +148,21 @@ ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.2 = add i32 %tileslice, 2 - %z2 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.2) + %z2 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.3 = add i32 %tileslice, 3 - %z3 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.3) + %z3 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.3) %tileslice.4 = add i32 %tileslice, 4 - %z4 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.4) + %z4 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.5 = add i32 %tileslice, 5 - %z5 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.5) + %z5 = call @llvm.aarch64.sme.read.horiz.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.5) %tileslice.6 = add i32 %tileslice, 6 - %z6 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.6) + %z6 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.6) %tileslice.7 = add i32 %tileslice, 7 - %z7 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i64 0, i32 %tileslice.7) + %z7 = call @llvm.aarch64.sme.read.vert.nxv8f16( %zd, %pg, i32 0, i32 %tileslice.7) ret %z0 } @@ -187,21 +187,21 @@ ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.2 = add i32 %tileslice, 2 - %z2 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.2) + %z2 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.3 = add i32 %tileslice, 3 - %z3 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.3) + %z3 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.3) %tileslice.4 = add i32 %tileslice, 4 - %z4 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.4) + %z4 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.4) %tileslice.5 = add i32 %tileslice, 5 - %z5 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.5) + %z5 = call @llvm.aarch64.sme.read.horiz.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.5) %tileslice.6 = add i32 %tileslice, 6 - %z6 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.6) + %z6 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.6) %tileslice.7 = add i32 %tileslice, 7 - %z7 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i64 0, i32 %tileslice.7) + %z7 = call @llvm.aarch64.sme.read.vert.nxv8bf16( %zd, %pg, i32 0, i32 %tileslice.7) ret %z0 } @@ -214,9 +214,9 @@ ; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 2] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i32 0, i32 %tileslice) %tileslice.2 = add i32 %tileslice, 2 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i64 0, i32 %tileslice.2) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( %zd, %pg, i32 0, i32 %tileslice.2) ret %z0 } @@ -230,9 +230,9 @@ ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i64 3, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i32 3, i32 %tileslice.1) %tileslice.3 = add i32 %tileslice, 3 - %z1 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i64 3, i32 %tileslice.3) + %z1 = call @llvm.aarch64.sme.read.vert.nxv4i32( %zd, %pg, i32 3, i32 %tileslice.3) ret %z0 } @@ -249,13 +249,13 @@ ; CHECK-NEXT: mov z0.s, p0/m, za0v.s[w12, 3] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv4f32( %zd, %pg, i32 0, i32 %tileslice.1) %tileslice.2 = add i32 %tileslice, 2 - %z2 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i64 0, i32 %tileslice.2) + %z2 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i32 0, i32 %tileslice.2) %tileslice.3 = add i32 %tileslice, 3 - %z3 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i64 0, i32 %tileslice.3) + %z3 = call @llvm.aarch64.sme.read.vert.nxv4f32( %zd, %pg, i32 0, i32 %tileslice.3) ret %z0 } @@ -265,7 +265,7 @@ ; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z0.d, p0/m, za0h.d[w12, 0] ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv2i64( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv2i64( %zd, %pg, i32 0, i32 %tileslice) ret %z0 } @@ -276,7 +276,7 @@ ; CHECK-NEXT: mov z0.d, p0/m, za1v.d[w12, 1] ; CHECK-NEXT: ret %tileslice.1 = add i32 %tileslice, 1 - %z0 = call @llvm.aarch64.sme.read.vert.nxv2i64( %zd, %pg, i64 1, i32 %tileslice.1) + %z0 = call @llvm.aarch64.sme.read.vert.nxv2i64( %zd, %pg, i32 1, i32 %tileslice.1) ret %z0 } @@ -289,9 +289,9 @@ ; CHECK-NEXT: mov z0.d, p0/m, za0v.d[w12, 1] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret - %z0 = call @llvm.aarch64.sme.read.horiz.nxv2f64( %zd, %pg, i64 0, i32 %tileslice) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv2f64( %zd, %pg, i32 0, i32 %tileslice) %tileslice.1 = add i32 %tileslice, 1 - %z1 = call @llvm.aarch64.sme.read.vert.nxv2f64( %zd, %pg, i64 0, i32 %tileslice.1) + %z1 = call @llvm.aarch64.sme.read.vert.nxv2f64( %zd, %pg, i32 0, i32 %tileslice.1) ret %z0 } @@ -301,7 +301,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv16i8( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv16i8( %zd, %pg, i32 0, i32 0) ret %res } @@ -311,7 +311,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv8i16( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv8i16( %zd, %pg, i32 0, i32 0) ret %res } @@ -321,7 +321,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv8f16( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv8f16( %zd, %pg, i32 0, i32 0) ret %res } @@ -331,7 +331,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv4i32( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv4i32( %zd, %pg, i32 0, i32 0) ret %res } @@ -341,7 +341,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv4f32( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv4f32( %zd, %pg, i32 0, i32 0) ret %res } @@ -351,7 +351,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv2i64( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv2i64( %zd, %pg, i32 0, i32 0) ret %res } @@ -361,7 +361,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za0h.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.horiz.nxv2f64( %zd, %pg, i64 0, i32 0) + %res = call @llvm.aarch64.sme.readq.horiz.nxv2f64( %zd, %pg, i32 0, i32 0) ret %res } @@ -371,7 +371,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv16i8( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv16i8( %zd, %pg, i32 15, i32 0) ret %res } @@ -381,7 +381,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv8i16( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv8i16( %zd, %pg, i32 15, i32 0) ret %res } @@ -391,7 +391,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv8f16( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv8f16( %zd, %pg, i32 15, i32 0) ret %res } @@ -401,7 +401,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv4i32( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv4i32( %zd, %pg, i32 15, i32 0) ret %res } @@ -411,7 +411,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv4f32( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv4f32( %zd, %pg, i32 15, i32 0) ret %res } @@ -421,7 +421,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv2i64( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv2i64( %zd, %pg, i32 15, i32 0) ret %res } @@ -431,7 +431,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov z0.q, p0/m, za15v.q[w12, 0] ; CHECK-NEXT: ret - %res = call @llvm.aarch64.sme.readq.vert.nxv2f64( %zd, %pg, i64 15, i32 0) + %res = call @llvm.aarch64.sme.readq.vert.nxv2f64( %zd, %pg, i32 15, i32 0) ret %res } @@ -461,9 +461,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i64 0, i32 %base) - %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i64 0, i32 %add1) - %z2 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i64 0, i32 %add2) + %z0 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i32 0, i32 %base) + %z1 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i32 0, i32 %add1) + %z2 = call @llvm.aarch64.sme.read.horiz.nxv4i32( zeroinitializer, %pg, i32 0, i32 %add2) %inc = add nuw nsw i32 %i, 3 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -474,36 +474,36 @@ ret %res } -declare @llvm.aarch64.sme.read.horiz.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.read.horiz.nxv2f64(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.read.vert.nxv2f64(, , i64, i32) +declare @llvm.aarch64.sme.read.horiz.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.read.horiz.nxv2f64(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.read.vert.nxv2f64(, , i32, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.readq.horiz.nxv2f64(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv16i8(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv8i16(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv8f16(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv8bf16(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv4i32(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv4f32(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv2i64(, , i64, i32) -declare @llvm.aarch64.sme.readq.vert.nxv2f64(, , i64, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.readq.horiz.nxv2f64(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv16i8(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv8i16(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv8f16(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv8bf16(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv4i32(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv4f32(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv2i64(, , i32, i32) +declare @llvm.aarch64.sme.readq.vert.nxv2f64(, , i32, i32) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll @@ -18,21 +18,21 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice, %pg, %z0) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.2, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.2, %pg, %z1) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.4, %pg, %z2) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.4, %pg, %z2) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.6, %pg, %z3) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.6, %pg, %z3) %tileslice.8 = add i32 %tileslice, 8 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.8, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.8, %pg, %z4) %tileslice.10 = add i32 %tileslice, 10 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.10, %pg, %z5) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.10, %pg, %z5) %tileslice.12 = add i32 %tileslice, 12 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.12, %pg, %z6) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.12, %pg, %z6) %tileslice.14 = add i32 %tileslice, 14 - call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 %tileslice.14, %pg, %z7) + call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 %tileslice.14, %pg, %z7) ret void } @@ -54,21 +54,21 @@ %z4, %z5, %z6, %z7) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.1, %pg, %z0) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.1, %pg, %z0) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.3, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.3, %pg, %z1) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.5, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.5, %pg, %z2) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.7, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.7, %pg, %z3) %tileslice.9 = add i32 %tileslice, 9 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.9, %pg, %z4) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.9, %pg, %z4) %tileslice.11 = add i32 %tileslice, 11 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.11, %pg, %z5) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.11, %pg, %z5) %tileslice.13 = add i32 %tileslice, 13 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.13, %pg, %z6) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.13, %pg, %z6) %tileslice.15 = add i32 %tileslice, 15 - call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 %tileslice.15, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 %tileslice.15, %pg, %z7) ret void } @@ -85,13 +85,13 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice, %pg, %z0) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.4, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.4, %pg, %z4) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 %tileslice.6, %pg, %z6) + call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 %tileslice.6, %pg, %z6) ret void } @@ -109,13 +109,13 @@ %z4, %z5, %z6, %z7) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.1, %pg, %z1) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.3, %pg, %z3) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.5, %pg, %z5) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.5, %pg, %z5) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 %tileslice.7, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 %tileslice.7, %pg, %z7) ret void } @@ -136,21 +136,21 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.1, %pg, %z1) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.3, %pg, %z3) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.4, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.4, %pg, %z4) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 %tileslice.5, %pg, %z5) + call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 %tileslice.5, %pg, %z5) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.6, %pg, %z6) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.6, %pg, %z6) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 %tileslice.7, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 %tileslice.7, %pg, %z7) ret void } @@ -171,21 +171,21 @@ %z2, %z3, %z4, %z5, %z6, %z7) { - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.1, %pg, %z1) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.3, %pg, %z3) %tileslice.4 = add i32 %tileslice, 4 - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.4, %pg, %z4) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.4, %pg, %z4) %tileslice.5 = add i32 %tileslice, 5 - call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 %tileslice.5, %pg, %z5) + call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 %tileslice.5, %pg, %z5) %tileslice.6 = add i32 %tileslice, 6 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.6, %pg, %z6) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.6, %pg, %z6) %tileslice.7 = add i32 %tileslice, 7 - call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 %tileslice.7, %pg, %z7) + call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 %tileslice.7, %pg, %z7) ret void } @@ -198,9 +198,9 @@ ; CHECK-NEXT: ret %z0, %z1, %z2, %z3) { - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %tileslice, %pg, %z0) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %tileslice.2, %pg, %z2) ret void } @@ -214,9 +214,9 @@ %z0, %z1, %z2, %z3) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 %tileslice.1, %pg, %z1) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 %tileslice.3, %pg, %z3) ret void } @@ -231,13 +231,13 @@ ; CHECK-NEXT: ret %z0, %z1, %z2, %z3) { - call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 %tileslice.1, %pg, %z1) %tileslice.2 = add i32 %tileslice, 2 - call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 %tileslice.2, %pg, %z2) + call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 %tileslice.2, %pg, %z2) %tileslice.3 = add i32 %tileslice, 3 - call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 %tileslice.3, %pg, %z3) + call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 %tileslice.3, %pg, %z3) ret void } @@ -248,7 +248,7 @@ ; CHECK-NEXT: mov za0h.d[w12, 0], p0/m, z0.d ; CHECK-NEXT: ret %z0, %z1) { - call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 %tileslice, %pg, %z0) ret void } @@ -260,7 +260,7 @@ ; CHECK-NEXT: ret %z0, %z1) { %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 %tileslice.1, %pg, %z1) ret void } @@ -272,9 +272,9 @@ ; CHECK-NEXT: mov za0v.d[w12, 1], p0/m, z1.d ; CHECK-NEXT: ret %z0, %z1) { - call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 0, i32 %tileslice, %pg, %z0) + call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 %tileslice, %pg, %z0) %tileslice.1 = add i32 %tileslice, 1 - call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 0, i32 %tileslice.1, %pg, %z1) + call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 %tileslice.1, %pg, %z1) ret void } @@ -284,7 +284,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 0, %pg, %zn) ret void } @@ -294,7 +294,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 0, %pg, %zn) ret void } @@ -304,7 +304,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 0, %pg, %zn) ret void } @@ -314,7 +314,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 0, %pg, %zn) ret void } @@ -324,7 +324,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 0, %pg, %zn) ret void } @@ -334,7 +334,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 0, %pg, %zn) ret void } @@ -344,7 +344,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 0, %pg, %zn) ret void } @@ -354,7 +354,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za0h.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 0, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 0, %pg, %zn) ret void } @@ -364,7 +364,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 0, %pg, %zn) ret void } @@ -374,7 +374,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 0, %pg, %zn) ret void } @@ -384,7 +384,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 0, %pg, %zn) ret void } @@ -394,7 +394,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 0, %pg, %zn) ret void } @@ -404,7 +404,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 0, %pg, %zn) ret void } @@ -414,7 +414,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 0, %pg, %zn) ret void } @@ -424,7 +424,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 0, %pg, %zn) ret void } @@ -434,7 +434,7 @@ ; CHECK-NEXT: mov w12, wzr ; CHECK-NEXT: mov za15v.q[w12, 0], p0/m, z0.q ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 15, i32 0, %pg, %zn) + call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 0, %pg, %zn) ret void } @@ -459,9 +459,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %base, %pg, zeroinitializer) - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %add1, %pg, zeroinitializer) - call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 %add2, %pg, zeroinitializer) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %base, %pg, zeroinitializer) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %add1, %pg, zeroinitializer) + call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 %add2, %pg, zeroinitializer) %inc = add nuw nsw i32 %i, 3 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -470,36 +470,36 @@ ret void } -declare void @llvm.aarch64.sme.write.horiz.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.write.horiz.nxv2f64(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.write.vert.nxv2f64(i64, i32, , ) - -declare void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64, i32, , ) -declare void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.write.horiz.nxv2f64(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.write.vert.nxv2f64(i32, i32, , ) + +declare void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32, i32, , ) +declare void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32, i32, , ) diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: st1b {za0v.b[w13, 0]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %ptr, i64 0, i32 0) + call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %ptr, i32 0, i32 0) ret void; } @@ -25,8 +25,8 @@ ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 - call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %base, i64 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1b.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.st1b.vert( %pg, ptr %base, i32 0, i32 %tileslice) ret void; } @@ -41,10 +41,10 @@ ; CHECK-NEXT: st1h {za1v.h[w12, 7]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i64 1, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %ptr, i32 1, i32 %tileslice) ret void; } @@ -58,8 +58,8 @@ ; CHECK-NEXT: ret %base = getelementptr i16, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 7 - call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %base, i64 1, i32 0) + call void @llvm.aarch64.sme.st1h.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1h.vert( %pg, ptr %base, i32 1, i32 0) ret void; } @@ -78,14 +78,14 @@ ; CHECK-NEXT: st1w {za3v.s[w13, 0]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i64 3, i32 %tileslice) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 2, i32 %tileslice) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i64 3, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %ptr, i32 3, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 2, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %ptr, i32 3, i32 0) ret void; } @@ -99,8 +99,8 @@ ; CHECK-NEXT: ret %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 - call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %base, i64 3, i32 %tileslice) + call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.st1w.vert( %pg, ptr %base, i32 3, i32 %tileslice) ret void; } @@ -127,22 +127,22 @@ ; CHECK-NEXT: st1d {za7v.d[w12, 1]}, p0, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 4, i32 %tileslice) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i64 7, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 4, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %ptr, i32 7, i32 %tileslice) ret void; } @@ -156,8 +156,8 @@ ; CHECK-NEXT: ret %base = getelementptr i64, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 1 - call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %base, i64 0, i32 %tileslice) - call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %base, i64 7, i32 0) + call void @llvm.aarch64.sme.st1d.horiz( %pg, ptr %base, i32 0, i32 %tileslice) + call void @llvm.aarch64.sme.st1d.vert( %pg, ptr %base, i32 7, i32 0) ret void; } @@ -198,38 +198,38 @@ ; CHECK-NEXT: st1q {za14v.q[w12, 0]}, p0, [x0] ; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0] ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i64 15, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 1, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 2, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 3, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 4, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 5, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 6, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 7, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 8, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 9, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 10, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 11, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 12, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 13, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 14, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i64 15, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %ptr, i32 15, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 0, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 1, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 2, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 3, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 4, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 5, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 6, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 7, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 8, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 9, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 10, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 11, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 12, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 13, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 14, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %ptr, i32 15, i32 0) ret void; } @@ -241,8 +241,8 @@ ; CHECK-NEXT: st1q {za15v.q[w12, 0]}, p0, [x0, x1, lsl #4] ; CHECK-NEXT: ret %base = getelementptr i128, ptr %ptr, i64 %index - call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %base, i64 0, i32 0) - call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %base, i64 15, i32 0) + call void @llvm.aarch64.sme.st1q.horiz( %pg, ptr %base, i32 0, i32 0) + call void @llvm.aarch64.sme.st1q.vert( %pg, ptr %base, i32 15, i32 0) ret void; } @@ -317,9 +317,9 @@ for.body: %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] - tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i64 0, i32 %base) - tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i64 0, i32 %add0) - tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i64 0, i32 %add1) + tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i32 0, i32 %base) + tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i32 0, i32 %add0) + tail call void @llvm.aarch64.sme.st1w.horiz( %pg, ptr %src, i32 0, i32 %add1) %inc = add nuw nsw i32 %i, 1 %exitcond.not = icmp eq i32 %inc, %N br i1 %exitcond.not, label %exit, label %for.body @@ -328,16 +328,16 @@ ret void } -declare void @llvm.aarch64.sme.st1b.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1h.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1w.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1d.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1q.horiz(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1b.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1h.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1w.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1d.vert(, ptr, i64, i32) -declare void @llvm.aarch64.sme.st1q.vert(, ptr, i64, i32) +declare void @llvm.aarch64.sme.st1b.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1h.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1w.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1d.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1q.horiz(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1b.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1h.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1w.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1d.vert(, ptr, i32, i32) +declare void @llvm.aarch64.sme.st1q.vert(, ptr, i32, i32) declare void @llvm.aarch64.sme.str(i32, ptr) declare i64 @llvm.vscale.i64() diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-zero.ll @@ -262,263 +262,263 @@ ; CHECK-NEXT: zero {za1.d, za2.d, za3.d, za4.d, za5.d, za6.d, za7.d} ; CHECK-NEXT: zero {za} ; CHECK-NEXT: ret - call void @llvm.aarch64.sme.zero(i64 0) - call void @llvm.aarch64.sme.zero(i64 1) - call void @llvm.aarch64.sme.zero(i64 2) - call void @llvm.aarch64.sme.zero(i64 3) - call void @llvm.aarch64.sme.zero(i64 4) - call void @llvm.aarch64.sme.zero(i64 5) - call void @llvm.aarch64.sme.zero(i64 6) - call void @llvm.aarch64.sme.zero(i64 7) - call void @llvm.aarch64.sme.zero(i64 8) - call void @llvm.aarch64.sme.zero(i64 9) - call void @llvm.aarch64.sme.zero(i64 10) - call void @llvm.aarch64.sme.zero(i64 11) - call void @llvm.aarch64.sme.zero(i64 12) - call void @llvm.aarch64.sme.zero(i64 13) - call void @llvm.aarch64.sme.zero(i64 14) - call void @llvm.aarch64.sme.zero(i64 15) - call void @llvm.aarch64.sme.zero(i64 16) - call void @llvm.aarch64.sme.zero(i64 17) - call void @llvm.aarch64.sme.zero(i64 18) - call void @llvm.aarch64.sme.zero(i64 19) - call void @llvm.aarch64.sme.zero(i64 20) - call void @llvm.aarch64.sme.zero(i64 21) - call void @llvm.aarch64.sme.zero(i64 22) - call void @llvm.aarch64.sme.zero(i64 23) - call void @llvm.aarch64.sme.zero(i64 24) - call void @llvm.aarch64.sme.zero(i64 25) - call void @llvm.aarch64.sme.zero(i64 26) - call void @llvm.aarch64.sme.zero(i64 27) - call void @llvm.aarch64.sme.zero(i64 28) - call void @llvm.aarch64.sme.zero(i64 29) - call void @llvm.aarch64.sme.zero(i64 30) - call void @llvm.aarch64.sme.zero(i64 31) - call void @llvm.aarch64.sme.zero(i64 32) - call void @llvm.aarch64.sme.zero(i64 33) - call void @llvm.aarch64.sme.zero(i64 34) - call void @llvm.aarch64.sme.zero(i64 35) - call void @llvm.aarch64.sme.zero(i64 36) - call void @llvm.aarch64.sme.zero(i64 37) - call void @llvm.aarch64.sme.zero(i64 38) - call void @llvm.aarch64.sme.zero(i64 39) - call void @llvm.aarch64.sme.zero(i64 40) - call void @llvm.aarch64.sme.zero(i64 41) - call void @llvm.aarch64.sme.zero(i64 42) - call void @llvm.aarch64.sme.zero(i64 43) - call void @llvm.aarch64.sme.zero(i64 44) - call void @llvm.aarch64.sme.zero(i64 45) - call void @llvm.aarch64.sme.zero(i64 46) - call void @llvm.aarch64.sme.zero(i64 47) - call void @llvm.aarch64.sme.zero(i64 48) - call void @llvm.aarch64.sme.zero(i64 49) - call void @llvm.aarch64.sme.zero(i64 50) - call void @llvm.aarch64.sme.zero(i64 51) - call void @llvm.aarch64.sme.zero(i64 52) - call void @llvm.aarch64.sme.zero(i64 53) - call void @llvm.aarch64.sme.zero(i64 54) - call void @llvm.aarch64.sme.zero(i64 55) - call void @llvm.aarch64.sme.zero(i64 56) - call void @llvm.aarch64.sme.zero(i64 57) - call void @llvm.aarch64.sme.zero(i64 58) - call void @llvm.aarch64.sme.zero(i64 59) - call void @llvm.aarch64.sme.zero(i64 60) - call void @llvm.aarch64.sme.zero(i64 61) - call void @llvm.aarch64.sme.zero(i64 62) - call void @llvm.aarch64.sme.zero(i64 63) - call void @llvm.aarch64.sme.zero(i64 64) - call void @llvm.aarch64.sme.zero(i64 65) - call void @llvm.aarch64.sme.zero(i64 66) - call void @llvm.aarch64.sme.zero(i64 67) - call void @llvm.aarch64.sme.zero(i64 68) - call void @llvm.aarch64.sme.zero(i64 69) - call void @llvm.aarch64.sme.zero(i64 70) - call void @llvm.aarch64.sme.zero(i64 71) - call void @llvm.aarch64.sme.zero(i64 72) - call void @llvm.aarch64.sme.zero(i64 73) - call void @llvm.aarch64.sme.zero(i64 74) - call void @llvm.aarch64.sme.zero(i64 75) - call void @llvm.aarch64.sme.zero(i64 76) - call void @llvm.aarch64.sme.zero(i64 77) - call void @llvm.aarch64.sme.zero(i64 78) - call void @llvm.aarch64.sme.zero(i64 79) - call void @llvm.aarch64.sme.zero(i64 80) - call void @llvm.aarch64.sme.zero(i64 81) - call void @llvm.aarch64.sme.zero(i64 82) - call void @llvm.aarch64.sme.zero(i64 83) - call void @llvm.aarch64.sme.zero(i64 84) - call void @llvm.aarch64.sme.zero(i64 85) - call void @llvm.aarch64.sme.zero(i64 86) - call void @llvm.aarch64.sme.zero(i64 87) - call void @llvm.aarch64.sme.zero(i64 88) - call void @llvm.aarch64.sme.zero(i64 89) - call void @llvm.aarch64.sme.zero(i64 90) - call void @llvm.aarch64.sme.zero(i64 91) - call void @llvm.aarch64.sme.zero(i64 92) - call void @llvm.aarch64.sme.zero(i64 93) - call void @llvm.aarch64.sme.zero(i64 94) - call void @llvm.aarch64.sme.zero(i64 95) - call void @llvm.aarch64.sme.zero(i64 96) - call void @llvm.aarch64.sme.zero(i64 97) - call void @llvm.aarch64.sme.zero(i64 98) - call void @llvm.aarch64.sme.zero(i64 99) - call void @llvm.aarch64.sme.zero(i64 100) - call void @llvm.aarch64.sme.zero(i64 101) - call void @llvm.aarch64.sme.zero(i64 102) - call void @llvm.aarch64.sme.zero(i64 103) - call void @llvm.aarch64.sme.zero(i64 104) - call void @llvm.aarch64.sme.zero(i64 105) - call void @llvm.aarch64.sme.zero(i64 106) - call void @llvm.aarch64.sme.zero(i64 107) - call void @llvm.aarch64.sme.zero(i64 108) - call void @llvm.aarch64.sme.zero(i64 109) - call void @llvm.aarch64.sme.zero(i64 110) - call void @llvm.aarch64.sme.zero(i64 111) - call void @llvm.aarch64.sme.zero(i64 112) - call void @llvm.aarch64.sme.zero(i64 113) - call void @llvm.aarch64.sme.zero(i64 114) - call void @llvm.aarch64.sme.zero(i64 115) - call void @llvm.aarch64.sme.zero(i64 116) - call void @llvm.aarch64.sme.zero(i64 117) - call void @llvm.aarch64.sme.zero(i64 118) - call void @llvm.aarch64.sme.zero(i64 119) - call void @llvm.aarch64.sme.zero(i64 120) - call void @llvm.aarch64.sme.zero(i64 121) - call void @llvm.aarch64.sme.zero(i64 122) - call void @llvm.aarch64.sme.zero(i64 123) - call void @llvm.aarch64.sme.zero(i64 124) - call void @llvm.aarch64.sme.zero(i64 125) - call void @llvm.aarch64.sme.zero(i64 126) - call void @llvm.aarch64.sme.zero(i64 127) - call void @llvm.aarch64.sme.zero(i64 128) - call void @llvm.aarch64.sme.zero(i64 129) - call void @llvm.aarch64.sme.zero(i64 130) - call void @llvm.aarch64.sme.zero(i64 131) - call void @llvm.aarch64.sme.zero(i64 132) - call void @llvm.aarch64.sme.zero(i64 133) - call void @llvm.aarch64.sme.zero(i64 134) - call void @llvm.aarch64.sme.zero(i64 135) - call void @llvm.aarch64.sme.zero(i64 136) - call void @llvm.aarch64.sme.zero(i64 137) - call void @llvm.aarch64.sme.zero(i64 138) - call void @llvm.aarch64.sme.zero(i64 139) - call void @llvm.aarch64.sme.zero(i64 140) - call void @llvm.aarch64.sme.zero(i64 141) - call void @llvm.aarch64.sme.zero(i64 142) - call void @llvm.aarch64.sme.zero(i64 143) - call void @llvm.aarch64.sme.zero(i64 144) - call void @llvm.aarch64.sme.zero(i64 145) - call void @llvm.aarch64.sme.zero(i64 146) - call void @llvm.aarch64.sme.zero(i64 147) - call void @llvm.aarch64.sme.zero(i64 148) - call void @llvm.aarch64.sme.zero(i64 149) - call void @llvm.aarch64.sme.zero(i64 150) - call void @llvm.aarch64.sme.zero(i64 151) - call void @llvm.aarch64.sme.zero(i64 152) - call void @llvm.aarch64.sme.zero(i64 153) - call void @llvm.aarch64.sme.zero(i64 154) - call void @llvm.aarch64.sme.zero(i64 155) - call void @llvm.aarch64.sme.zero(i64 156) - call void @llvm.aarch64.sme.zero(i64 157) - call void @llvm.aarch64.sme.zero(i64 158) - call void @llvm.aarch64.sme.zero(i64 159) - call void @llvm.aarch64.sme.zero(i64 160) - call void @llvm.aarch64.sme.zero(i64 161) - call void @llvm.aarch64.sme.zero(i64 162) - call void @llvm.aarch64.sme.zero(i64 163) - call void @llvm.aarch64.sme.zero(i64 164) - call void @llvm.aarch64.sme.zero(i64 165) - call void @llvm.aarch64.sme.zero(i64 166) - call void @llvm.aarch64.sme.zero(i64 167) - call void @llvm.aarch64.sme.zero(i64 168) - call void @llvm.aarch64.sme.zero(i64 169) - call void @llvm.aarch64.sme.zero(i64 170) - call void @llvm.aarch64.sme.zero(i64 171) - call void @llvm.aarch64.sme.zero(i64 172) - call void @llvm.aarch64.sme.zero(i64 173) - call void @llvm.aarch64.sme.zero(i64 174) - call void @llvm.aarch64.sme.zero(i64 175) - call void @llvm.aarch64.sme.zero(i64 176) - call void @llvm.aarch64.sme.zero(i64 177) - call void @llvm.aarch64.sme.zero(i64 178) - call void @llvm.aarch64.sme.zero(i64 179) - call void @llvm.aarch64.sme.zero(i64 180) - call void @llvm.aarch64.sme.zero(i64 181) - call void @llvm.aarch64.sme.zero(i64 182) - call void @llvm.aarch64.sme.zero(i64 183) - call void @llvm.aarch64.sme.zero(i64 184) - call void @llvm.aarch64.sme.zero(i64 185) - call void @llvm.aarch64.sme.zero(i64 186) - call void @llvm.aarch64.sme.zero(i64 187) - call void @llvm.aarch64.sme.zero(i64 188) - call void @llvm.aarch64.sme.zero(i64 189) - call void @llvm.aarch64.sme.zero(i64 190) - call void @llvm.aarch64.sme.zero(i64 191) - call void @llvm.aarch64.sme.zero(i64 192) - call void @llvm.aarch64.sme.zero(i64 193) - call void @llvm.aarch64.sme.zero(i64 194) - call void @llvm.aarch64.sme.zero(i64 195) - call void @llvm.aarch64.sme.zero(i64 196) - call void @llvm.aarch64.sme.zero(i64 197) - call void @llvm.aarch64.sme.zero(i64 198) - call void @llvm.aarch64.sme.zero(i64 199) - call void @llvm.aarch64.sme.zero(i64 200) - call void @llvm.aarch64.sme.zero(i64 201) - call void @llvm.aarch64.sme.zero(i64 202) - call void @llvm.aarch64.sme.zero(i64 203) - call void @llvm.aarch64.sme.zero(i64 204) - call void @llvm.aarch64.sme.zero(i64 205) - call void @llvm.aarch64.sme.zero(i64 206) - call void @llvm.aarch64.sme.zero(i64 207) - call void @llvm.aarch64.sme.zero(i64 208) - call void @llvm.aarch64.sme.zero(i64 209) - call void @llvm.aarch64.sme.zero(i64 210) - call void @llvm.aarch64.sme.zero(i64 211) - call void @llvm.aarch64.sme.zero(i64 212) - call void @llvm.aarch64.sme.zero(i64 213) - call void @llvm.aarch64.sme.zero(i64 214) - call void @llvm.aarch64.sme.zero(i64 215) - call void @llvm.aarch64.sme.zero(i64 216) - call void @llvm.aarch64.sme.zero(i64 217) - call void @llvm.aarch64.sme.zero(i64 218) - call void @llvm.aarch64.sme.zero(i64 219) - call void @llvm.aarch64.sme.zero(i64 220) - call void @llvm.aarch64.sme.zero(i64 221) - call void @llvm.aarch64.sme.zero(i64 222) - call void @llvm.aarch64.sme.zero(i64 223) - call void @llvm.aarch64.sme.zero(i64 224) - call void @llvm.aarch64.sme.zero(i64 225) - call void @llvm.aarch64.sme.zero(i64 226) - call void @llvm.aarch64.sme.zero(i64 227) - call void @llvm.aarch64.sme.zero(i64 228) - call void @llvm.aarch64.sme.zero(i64 229) - call void @llvm.aarch64.sme.zero(i64 230) - call void @llvm.aarch64.sme.zero(i64 231) - call void @llvm.aarch64.sme.zero(i64 232) - call void @llvm.aarch64.sme.zero(i64 233) - call void @llvm.aarch64.sme.zero(i64 234) - call void @llvm.aarch64.sme.zero(i64 235) - call void @llvm.aarch64.sme.zero(i64 236) - call void @llvm.aarch64.sme.zero(i64 237) - call void @llvm.aarch64.sme.zero(i64 238) - call void @llvm.aarch64.sme.zero(i64 239) - call void @llvm.aarch64.sme.zero(i64 240) - call void @llvm.aarch64.sme.zero(i64 241) - call void @llvm.aarch64.sme.zero(i64 242) - call void @llvm.aarch64.sme.zero(i64 243) - call void @llvm.aarch64.sme.zero(i64 244) - call void @llvm.aarch64.sme.zero(i64 245) - call void @llvm.aarch64.sme.zero(i64 246) - call void @llvm.aarch64.sme.zero(i64 247) - call void @llvm.aarch64.sme.zero(i64 248) - call void @llvm.aarch64.sme.zero(i64 249) - call void @llvm.aarch64.sme.zero(i64 250) - call void @llvm.aarch64.sme.zero(i64 251) - call void @llvm.aarch64.sme.zero(i64 252) - call void @llvm.aarch64.sme.zero(i64 253) - call void @llvm.aarch64.sme.zero(i64 254) - call void @llvm.aarch64.sme.zero(i64 255) + call void @llvm.aarch64.sme.zero(i32 0) + call void @llvm.aarch64.sme.zero(i32 1) + call void @llvm.aarch64.sme.zero(i32 2) + call void @llvm.aarch64.sme.zero(i32 3) + call void @llvm.aarch64.sme.zero(i32 4) + call void @llvm.aarch64.sme.zero(i32 5) + call void @llvm.aarch64.sme.zero(i32 6) + call void @llvm.aarch64.sme.zero(i32 7) + call void @llvm.aarch64.sme.zero(i32 8) + call void @llvm.aarch64.sme.zero(i32 9) + call void @llvm.aarch64.sme.zero(i32 10) + call void @llvm.aarch64.sme.zero(i32 11) + call void @llvm.aarch64.sme.zero(i32 12) + call void @llvm.aarch64.sme.zero(i32 13) + call void @llvm.aarch64.sme.zero(i32 14) + call void @llvm.aarch64.sme.zero(i32 15) + call void @llvm.aarch64.sme.zero(i32 16) + call void @llvm.aarch64.sme.zero(i32 17) + call void @llvm.aarch64.sme.zero(i32 18) + call void @llvm.aarch64.sme.zero(i32 19) + call void @llvm.aarch64.sme.zero(i32 20) + call void @llvm.aarch64.sme.zero(i32 21) + call void @llvm.aarch64.sme.zero(i32 22) + call void @llvm.aarch64.sme.zero(i32 23) + call void @llvm.aarch64.sme.zero(i32 24) + call void @llvm.aarch64.sme.zero(i32 25) + call void @llvm.aarch64.sme.zero(i32 26) + call void @llvm.aarch64.sme.zero(i32 27) + call void @llvm.aarch64.sme.zero(i32 28) + call void @llvm.aarch64.sme.zero(i32 29) + call void @llvm.aarch64.sme.zero(i32 30) + call void @llvm.aarch64.sme.zero(i32 31) + call void @llvm.aarch64.sme.zero(i32 32) + call void @llvm.aarch64.sme.zero(i32 33) + call void @llvm.aarch64.sme.zero(i32 34) + call void @llvm.aarch64.sme.zero(i32 35) + call void @llvm.aarch64.sme.zero(i32 36) + call void @llvm.aarch64.sme.zero(i32 37) + call void @llvm.aarch64.sme.zero(i32 38) + call void @llvm.aarch64.sme.zero(i32 39) + call void @llvm.aarch64.sme.zero(i32 40) + call void @llvm.aarch64.sme.zero(i32 41) + call void @llvm.aarch64.sme.zero(i32 42) + call void @llvm.aarch64.sme.zero(i32 43) + call void @llvm.aarch64.sme.zero(i32 44) + call void @llvm.aarch64.sme.zero(i32 45) + call void @llvm.aarch64.sme.zero(i32 46) + call void @llvm.aarch64.sme.zero(i32 47) + call void @llvm.aarch64.sme.zero(i32 48) + call void @llvm.aarch64.sme.zero(i32 49) + call void @llvm.aarch64.sme.zero(i32 50) + call void @llvm.aarch64.sme.zero(i32 51) + call void @llvm.aarch64.sme.zero(i32 52) + call void @llvm.aarch64.sme.zero(i32 53) + call void @llvm.aarch64.sme.zero(i32 54) + call void @llvm.aarch64.sme.zero(i32 55) + call void @llvm.aarch64.sme.zero(i32 56) + call void @llvm.aarch64.sme.zero(i32 57) + call void @llvm.aarch64.sme.zero(i32 58) + call void @llvm.aarch64.sme.zero(i32 59) + call void @llvm.aarch64.sme.zero(i32 60) + call void @llvm.aarch64.sme.zero(i32 61) + call void @llvm.aarch64.sme.zero(i32 62) + call void @llvm.aarch64.sme.zero(i32 63) + call void @llvm.aarch64.sme.zero(i32 64) + call void @llvm.aarch64.sme.zero(i32 65) + call void @llvm.aarch64.sme.zero(i32 66) + call void @llvm.aarch64.sme.zero(i32 67) + call void @llvm.aarch64.sme.zero(i32 68) + call void @llvm.aarch64.sme.zero(i32 69) + call void @llvm.aarch64.sme.zero(i32 70) + call void @llvm.aarch64.sme.zero(i32 71) + call void @llvm.aarch64.sme.zero(i32 72) + call void @llvm.aarch64.sme.zero(i32 73) + call void @llvm.aarch64.sme.zero(i32 74) + call void @llvm.aarch64.sme.zero(i32 75) + call void @llvm.aarch64.sme.zero(i32 76) + call void @llvm.aarch64.sme.zero(i32 77) + call void @llvm.aarch64.sme.zero(i32 78) + call void @llvm.aarch64.sme.zero(i32 79) + call void @llvm.aarch64.sme.zero(i32 80) + call void @llvm.aarch64.sme.zero(i32 81) + call void @llvm.aarch64.sme.zero(i32 82) + call void @llvm.aarch64.sme.zero(i32 83) + call void @llvm.aarch64.sme.zero(i32 84) + call void @llvm.aarch64.sme.zero(i32 85) + call void @llvm.aarch64.sme.zero(i32 86) + call void @llvm.aarch64.sme.zero(i32 87) + call void @llvm.aarch64.sme.zero(i32 88) + call void @llvm.aarch64.sme.zero(i32 89) + call void @llvm.aarch64.sme.zero(i32 90) + call void @llvm.aarch64.sme.zero(i32 91) + call void @llvm.aarch64.sme.zero(i32 92) + call void @llvm.aarch64.sme.zero(i32 93) + call void @llvm.aarch64.sme.zero(i32 94) + call void @llvm.aarch64.sme.zero(i32 95) + call void @llvm.aarch64.sme.zero(i32 96) + call void @llvm.aarch64.sme.zero(i32 97) + call void @llvm.aarch64.sme.zero(i32 98) + call void @llvm.aarch64.sme.zero(i32 99) + call void @llvm.aarch64.sme.zero(i32 100) + call void @llvm.aarch64.sme.zero(i32 101) + call void @llvm.aarch64.sme.zero(i32 102) + call void @llvm.aarch64.sme.zero(i32 103) + call void @llvm.aarch64.sme.zero(i32 104) + call void @llvm.aarch64.sme.zero(i32 105) + call void @llvm.aarch64.sme.zero(i32 106) + call void @llvm.aarch64.sme.zero(i32 107) + call void @llvm.aarch64.sme.zero(i32 108) + call void @llvm.aarch64.sme.zero(i32 109) + call void @llvm.aarch64.sme.zero(i32 110) + call void @llvm.aarch64.sme.zero(i32 111) + call void @llvm.aarch64.sme.zero(i32 112) + call void @llvm.aarch64.sme.zero(i32 113) + call void @llvm.aarch64.sme.zero(i32 114) + call void @llvm.aarch64.sme.zero(i32 115) + call void @llvm.aarch64.sme.zero(i32 116) + call void @llvm.aarch64.sme.zero(i32 117) + call void @llvm.aarch64.sme.zero(i32 118) + call void @llvm.aarch64.sme.zero(i32 119) + call void @llvm.aarch64.sme.zero(i32 120) + call void @llvm.aarch64.sme.zero(i32 121) + call void @llvm.aarch64.sme.zero(i32 122) + call void @llvm.aarch64.sme.zero(i32 123) + call void @llvm.aarch64.sme.zero(i32 124) + call void @llvm.aarch64.sme.zero(i32 125) + call void @llvm.aarch64.sme.zero(i32 126) + call void @llvm.aarch64.sme.zero(i32 127) + call void @llvm.aarch64.sme.zero(i32 128) + call void @llvm.aarch64.sme.zero(i32 129) + call void @llvm.aarch64.sme.zero(i32 130) + call void @llvm.aarch64.sme.zero(i32 131) + call void @llvm.aarch64.sme.zero(i32 132) + call void @llvm.aarch64.sme.zero(i32 133) + call void @llvm.aarch64.sme.zero(i32 134) + call void @llvm.aarch64.sme.zero(i32 135) + call void @llvm.aarch64.sme.zero(i32 136) + call void @llvm.aarch64.sme.zero(i32 137) + call void @llvm.aarch64.sme.zero(i32 138) + call void @llvm.aarch64.sme.zero(i32 139) + call void @llvm.aarch64.sme.zero(i32 140) + call void @llvm.aarch64.sme.zero(i32 141) + call void @llvm.aarch64.sme.zero(i32 142) + call void @llvm.aarch64.sme.zero(i32 143) + call void @llvm.aarch64.sme.zero(i32 144) + call void @llvm.aarch64.sme.zero(i32 145) + call void @llvm.aarch64.sme.zero(i32 146) + call void @llvm.aarch64.sme.zero(i32 147) + call void @llvm.aarch64.sme.zero(i32 148) + call void @llvm.aarch64.sme.zero(i32 149) + call void @llvm.aarch64.sme.zero(i32 150) + call void @llvm.aarch64.sme.zero(i32 151) + call void @llvm.aarch64.sme.zero(i32 152) + call void @llvm.aarch64.sme.zero(i32 153) + call void @llvm.aarch64.sme.zero(i32 154) + call void @llvm.aarch64.sme.zero(i32 155) + call void @llvm.aarch64.sme.zero(i32 156) + call void @llvm.aarch64.sme.zero(i32 157) + call void @llvm.aarch64.sme.zero(i32 158) + call void @llvm.aarch64.sme.zero(i32 159) + call void @llvm.aarch64.sme.zero(i32 160) + call void @llvm.aarch64.sme.zero(i32 161) + call void @llvm.aarch64.sme.zero(i32 162) + call void @llvm.aarch64.sme.zero(i32 163) + call void @llvm.aarch64.sme.zero(i32 164) + call void @llvm.aarch64.sme.zero(i32 165) + call void @llvm.aarch64.sme.zero(i32 166) + call void @llvm.aarch64.sme.zero(i32 167) + call void @llvm.aarch64.sme.zero(i32 168) + call void @llvm.aarch64.sme.zero(i32 169) + call void @llvm.aarch64.sme.zero(i32 170) + call void @llvm.aarch64.sme.zero(i32 171) + call void @llvm.aarch64.sme.zero(i32 172) + call void @llvm.aarch64.sme.zero(i32 173) + call void @llvm.aarch64.sme.zero(i32 174) + call void @llvm.aarch64.sme.zero(i32 175) + call void @llvm.aarch64.sme.zero(i32 176) + call void @llvm.aarch64.sme.zero(i32 177) + call void @llvm.aarch64.sme.zero(i32 178) + call void @llvm.aarch64.sme.zero(i32 179) + call void @llvm.aarch64.sme.zero(i32 180) + call void @llvm.aarch64.sme.zero(i32 181) + call void @llvm.aarch64.sme.zero(i32 182) + call void @llvm.aarch64.sme.zero(i32 183) + call void @llvm.aarch64.sme.zero(i32 184) + call void @llvm.aarch64.sme.zero(i32 185) + call void @llvm.aarch64.sme.zero(i32 186) + call void @llvm.aarch64.sme.zero(i32 187) + call void @llvm.aarch64.sme.zero(i32 188) + call void @llvm.aarch64.sme.zero(i32 189) + call void @llvm.aarch64.sme.zero(i32 190) + call void @llvm.aarch64.sme.zero(i32 191) + call void @llvm.aarch64.sme.zero(i32 192) + call void @llvm.aarch64.sme.zero(i32 193) + call void @llvm.aarch64.sme.zero(i32 194) + call void @llvm.aarch64.sme.zero(i32 195) + call void @llvm.aarch64.sme.zero(i32 196) + call void @llvm.aarch64.sme.zero(i32 197) + call void @llvm.aarch64.sme.zero(i32 198) + call void @llvm.aarch64.sme.zero(i32 199) + call void @llvm.aarch64.sme.zero(i32 200) + call void @llvm.aarch64.sme.zero(i32 201) + call void @llvm.aarch64.sme.zero(i32 202) + call void @llvm.aarch64.sme.zero(i32 203) + call void @llvm.aarch64.sme.zero(i32 204) + call void @llvm.aarch64.sme.zero(i32 205) + call void @llvm.aarch64.sme.zero(i32 206) + call void @llvm.aarch64.sme.zero(i32 207) + call void @llvm.aarch64.sme.zero(i32 208) + call void @llvm.aarch64.sme.zero(i32 209) + call void @llvm.aarch64.sme.zero(i32 210) + call void @llvm.aarch64.sme.zero(i32 211) + call void @llvm.aarch64.sme.zero(i32 212) + call void @llvm.aarch64.sme.zero(i32 213) + call void @llvm.aarch64.sme.zero(i32 214) + call void @llvm.aarch64.sme.zero(i32 215) + call void @llvm.aarch64.sme.zero(i32 216) + call void @llvm.aarch64.sme.zero(i32 217) + call void @llvm.aarch64.sme.zero(i32 218) + call void @llvm.aarch64.sme.zero(i32 219) + call void @llvm.aarch64.sme.zero(i32 220) + call void @llvm.aarch64.sme.zero(i32 221) + call void @llvm.aarch64.sme.zero(i32 222) + call void @llvm.aarch64.sme.zero(i32 223) + call void @llvm.aarch64.sme.zero(i32 224) + call void @llvm.aarch64.sme.zero(i32 225) + call void @llvm.aarch64.sme.zero(i32 226) + call void @llvm.aarch64.sme.zero(i32 227) + call void @llvm.aarch64.sme.zero(i32 228) + call void @llvm.aarch64.sme.zero(i32 229) + call void @llvm.aarch64.sme.zero(i32 230) + call void @llvm.aarch64.sme.zero(i32 231) + call void @llvm.aarch64.sme.zero(i32 232) + call void @llvm.aarch64.sme.zero(i32 233) + call void @llvm.aarch64.sme.zero(i32 234) + call void @llvm.aarch64.sme.zero(i32 235) + call void @llvm.aarch64.sme.zero(i32 236) + call void @llvm.aarch64.sme.zero(i32 237) + call void @llvm.aarch64.sme.zero(i32 238) + call void @llvm.aarch64.sme.zero(i32 239) + call void @llvm.aarch64.sme.zero(i32 240) + call void @llvm.aarch64.sme.zero(i32 241) + call void @llvm.aarch64.sme.zero(i32 242) + call void @llvm.aarch64.sme.zero(i32 243) + call void @llvm.aarch64.sme.zero(i32 244) + call void @llvm.aarch64.sme.zero(i32 245) + call void @llvm.aarch64.sme.zero(i32 246) + call void @llvm.aarch64.sme.zero(i32 247) + call void @llvm.aarch64.sme.zero(i32 248) + call void @llvm.aarch64.sme.zero(i32 249) + call void @llvm.aarch64.sme.zero(i32 250) + call void @llvm.aarch64.sme.zero(i32 251) + call void @llvm.aarch64.sme.zero(i32 252) + call void @llvm.aarch64.sme.zero(i32 253) + call void @llvm.aarch64.sme.zero(i32 254) + call void @llvm.aarch64.sme.zero(i32 255) ret void } -declare void @llvm.aarch64.sme.zero(i64) +declare void @llvm.aarch64.sme.zero(i32) diff --git a/llvm/test/CodeGen/AArch64/sme-invoke-resume-pstatesm.ll b/llvm/test/CodeGen/AArch64/sme-invoke-resume-pstatesm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-invoke-resume-pstatesm.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +define void @unconditional_smstart() nounwind { +; CHECK-LABEL: unconditional_smstart: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 1) + ret void +} + +; FIXME: This case can be optimised away, since we know the condition +; will evaluate to false. +define void @conditional_smstart() nounwind { +; CHECK-LABEL: conditional_smstart: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: tbz xzr, #0, .LBB1_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB1_2: +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 0) + ret void +} + +define void @conditional_smstart_with_entry_val(i64 %pstate) nounwind { +; CHECK-LABEL: conditional_smstart_with_entry_val: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: tbz x0, #0, .LBB2_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB2_2: +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.invoke.resume.pstatesm(i64 %pstate) + ret void +} + +declare void @llvm.aarch64.sme.invoke.resume.pstatesm(i64) diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -3,6 +3,7 @@ declare void @private_za_callee() declare float @llvm.cos.f32(float) +declare float @llvm.sin.f32(float) ; Test lazy-save mechanism for a single callee. define void @test_lazy_save_1_callee() nounwind "aarch64_pstate_za_shared" { @@ -17,7 +18,7 @@ ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: str x9, [x29] +; CHECK-NEXT: stur x9, [x29, #-16] ; CHECK-NEXT: sturh w8, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl private_za_callee @@ -50,7 +51,7 @@ ; CHECK-NEXT: sub x8, x8, x19 ; CHECK-NEXT: mov sp, x8 ; CHECK-NEXT: sub x20, x29, #16 -; CHECK-NEXT: str x8, [x29] +; CHECK-NEXT: stur x8, [x29, #-16] ; CHECK-NEXT: sturh w19, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee @@ -86,17 +87,19 @@ define float @test_lazy_save_expanded_intrinsic(float %a) nounwind "aarch64_pstate_za_shared" { ; CHECK-LABEL: test_lazy_save_expanded_intrinsic: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: str x9, [x29] ; CHECK-NEXT: sturh w8, [x29, #-8] +; CHECK-NEXT: stur x9, [x29, #-16] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: smstart za @@ -107,13 +110,78 @@ ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB2_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x19 ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call float @llvm.cos.f32(float %a) ret float %res } +; Test that we properly set-up and restore a lazy-save twice and +; in-sequence when we do a call to both a sin and cos intrinsic. +define float @test_lazy_save_expanded_intrinsic_two_calls(float %a) nounwind "aarch64_pstate_za_shared" { +; CHECK-LABEL: test_lazy_save_expanded_intrinsic_two_calls: +; CHECK: // %bb.0: +; CHECK-NEXT: stp d9, d8, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: rdsvl x8, #1 +; CHECK-NEXT: fmov s8, s0 +; CHECK-NEXT: mul x20, x8, x8 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: sub x8, x8, x20 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: sub x9, x29, #32 +; CHECK-NEXT: sturh w20, [x29, #-24] +; CHECK-NEXT: stur x8, [x29, #-32] +; CHECK-NEXT: msr TPIDR2_EL0, x9 +; CHECK-NEXT: bl cosf +; CHECK-NEXT: fmov s9, s0 +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #32 +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB3_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x19 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: sub x8, x8, x20 +; CHECK-NEXT: sub x8, x8, #16 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: sub x9, x29, #48 +; CHECK-NEXT: sturh w20, [x29, #-40] +; CHECK-NEXT: stur x8, [x29, #-48] +; CHECK-NEXT: msr TPIDR2_EL0, x9 +; CHECK-NEXT: bl sinf +; CHECK-NEXT: smstart za +; CHECK-NEXT: sub x0, x29, #48 +; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: cbnz x8, .LBB3_4 +; CHECK-NEXT: // %bb.3: +; CHECK-NEXT: bl __arm_tpidr2_restore +; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x19 +; CHECK-NEXT: fadd s0, s9, s0 +; CHECK-NEXT: sub sp, x29, #16 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: ret + %cos = call float @llvm.cos.f32(float %a) + %sin = call float @llvm.sin.f32(float %a) + %res = fadd float %cos, %sin + ret float %res +} + ; Test a combination of streaming-compatible -> normal call with lazy-save. define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: test_lazy_save_and_conditional_smstart: @@ -132,27 +200,27 @@ ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #80 -; CHECK-NEXT: stur x9, [x29, #-64] +; CHECK-NEXT: stur x9, [x29, #-80] ; CHECK-NEXT: sturh w8, [x29, #-72] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: and x19, x0, #0x1 -; CHECK-NEXT: tbz x19, #0, .LBB3_2 +; CHECK-NEXT: tbz x19, #0, .LBB4_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm -; CHECK-NEXT: .LBB3_2: +; CHECK-NEXT: .LBB4_2: ; CHECK-NEXT: bl private_za_callee -; CHECK-NEXT: tbz x19, #0, .LBB3_4 +; CHECK-NEXT: tbz x19, #0, .LBB4_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm -; CHECK-NEXT: .LBB3_4: +; CHECK-NEXT: .LBB4_4: ; CHECK-NEXT: smstart za ; CHECK-NEXT: sub x0, x29, #80 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-NEXT: cbnz x8, .LBB3_6 +; CHECK-NEXT: cbnz x8, .LBB4_6 ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: bl __arm_tpidr2_restore -; CHECK-NEXT: .LBB3_6: +; CHECK-NEXT: .LBB4_6: ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: sub sp, x29, #64 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll --- a/llvm/test/CodeGen/AArch64/sme-new-za-function.ll +++ b/llvm/test/CodeGen/AArch64/sme-new-za-function.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[TMP0:%.*]] ; CHECK: save.za: -; CHECK-NEXT: call void @__arm_tpidr2_save() +; CHECK-NEXT: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() ; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) ; CHECK-NEXT: br label [[TMP0]] ; CHECK: 0: @@ -30,7 +30,7 @@ ; CHECK-NEXT: [[CMP:%.*]] = icmp ne i64 [[TPIDR2]], 0 ; CHECK-NEXT: br i1 [[CMP]], label [[SAVE_ZA:%.*]], label [[ENTRY:%.*]] ; CHECK: save.za: -; CHECK-NEXT: call void @__arm_tpidr2_save() +; CHECK-NEXT: call aarch64_sme_preservemost_from_x0 void @__arm_tpidr2_save() ; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) ; CHECK-NEXT: br label [[ENTRY]] ; CHECK: entry: @@ -59,4 +59,5 @@ ret i32 %sub } -; CHECK: declare "aarch64_pstate_sm_compatible" void @__arm_tpidr2_save() +; CHECK: declare void @__arm_tpidr2_save() #[[ATTR:[0-9]+]] +; CHECK: attributes #[[ATTR]] = { "aarch64_pstate_sm_compatible" "aarch64_pstate_za_preserved" } diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll @@ -0,0 +1,154 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -stop-after=aarch64-sme-peephole-opt < %s | FileCheck %s + +declare void @normal_callee(); +declare void @streaming_callee() "aarch64_pstate_sm_enabled"; +declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; + +define void @streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_enabled" { + ; CHECK-LABEL: name: streaming_caller_normal_callee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv + ; CHECK-NEXT: RET_ReallyLR + call void @normal_callee(); + call void @normal_callee(); + call void @normal_callee(); + ret void; +} + +define void @locally_streaming_caller_normal_callee() nounwind "aarch64_pstate_sm_body" { + ; CHECK-LABEL: name: locally_streaming_caller_normal_callee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RET_ReallyLR + call void @normal_callee(); + call void @normal_callee(); + call void @normal_callee(); + ret void; +} + +define void @streaming_compatible_caller_normal_callee() nounwind "aarch64_pstate_sm_compatible" { + ; CHECK-LABEL: name: streaming_compatible_caller_normal_callee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4096 + ; CHECK-NEXT: MSRpstatePseudo 1, 0, [[ANDXri]], 0, csr_aarch64_smstartstop, implicit-def $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri1:%[0-9]+]]:gpr64common = ANDXri [[COPY1]], 4096 + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri2:%[0-9]+]]:gpr64common = ANDXri [[COPY2]], 4096 + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatePseudo 1, 1, [[ANDXri2]], 0, csr_aarch64_smstartstop + ; CHECK-NEXT: RET_ReallyLR + call void @normal_callee(); + call void @normal_callee(); + call void @normal_callee(); + ret void; +} + +define void @normal_caller_streaming_callee() nounwind { + ; CHECK-LABEL: name: normal_caller_streaming_callee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit-def $sp + ; CHECK-NEXT: BL @streaming_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @streaming_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL @streaming_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv + ; CHECK-NEXT: RET_ReallyLR + call void @streaming_callee(); + call void @streaming_callee(); + call void @streaming_callee(); + ret void; +} + +define void @streaming_compatible_caller_mixed_callees() nounwind "aarch64_pstate_sm_compatible" { + ; CHECK-LABEL: name: streaming_compatible_caller_mixed_callees + ; CHECK: bb.0 (%ir-block.0): + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4096 + ; CHECK-NEXT: MSRpstatePseudo 1, 1, [[ANDXri]], 1, csr_aarch64_smstartstop, implicit-def $sp + ; CHECK-NEXT: BL @streaming_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatePseudo 1, 0, [[ANDXri]], 1, csr_aarch64_smstartstop + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri1:%[0-9]+]]:gpr64common = ANDXri [[COPY1]], 4096 + ; CHECK-NEXT: MSRpstatePseudo 1, 0, [[ANDXri1]], 0, csr_aarch64_smstartstop, implicit-def $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatePseudo 1, 1, [[ANDXri1]], 0, csr_aarch64_smstartstop + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri2:%[0-9]+]]:gpr64common = ANDXri [[COPY2]], 4096 + ; CHECK-NEXT: MSRpstatePseudo 1, 1, [[ANDXri2]], 1, csr_aarch64_smstartstop, implicit-def $sp + ; CHECK-NEXT: BL @streaming_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatePseudo 1, 0, [[ANDXri2]], 1, csr_aarch64_smstartstop + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: BL &__arm_sme_state, csr_aarch64_sme_abi_support_routines_preservemost_from_x2, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $x0 + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[ANDXri3:%[0-9]+]]:gpr64common = ANDXri [[COPY3]], 4096 + ; CHECK-NEXT: MSRpstatePseudo 1, 0, [[ANDXri3]], 0, csr_aarch64_smstartstop, implicit-def $sp + ; CHECK-NEXT: BL @normal_callee, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatePseudo 1, 1, [[ANDXri3]], 0, csr_aarch64_smstartstop + ; CHECK-NEXT: RET_ReallyLR + call void @streaming_callee(); + call void @normal_callee(); + call void @streaming_callee(); + call void @normal_callee(); + ret void; +} diff --git a/llvm/test/CodeGen/AArch64/sme-read-write-tpidr2.ll b/llvm/test/CodeGen/AArch64/sme-read-write-tpidr2.ll --- a/llvm/test/CodeGen/AArch64/sme-read-write-tpidr2.ll +++ b/llvm/test/CodeGen/AArch64/sme-read-write-tpidr2.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 -mattr=+sme < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+sme -verify-machineinstrs < %s | FileCheck %s define i64 @get_tpidr2_el0() { ; CHECK-LABEL: get_tpidr2_el0: diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-abi.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-abi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-shared-za-abi.ll @@ -0,0 +1,225 @@ +; RUN: opt -S -mtriple=aarch64-linux-gnu -opaque-pointers -aarch64-sme-abi %s | FileCheck %s +; RUN: opt -S -mtriple=aarch64-linux-gnu -opaque-pointers -aarch64-sme-abi -aarch64-sme-abi %s | FileCheck %s +; Check that the test also passes when not in opaque-pointer mode. +; RUN: opt -S -mtriple=aarch64-linux-gnu -aarch64-sme-abi %s -o /dev/null + +; XFAIL: * + +declare void @private_za_callee(); +declare void @shared_za_callee() "aarch64_pstate_za_shared" +declare void @new_za_callee() "aarch64_pstate_za_new" +declare void @preserved_za_callee() "aarch64_pstate_za_preserved" +declare i64 @foo(i64); + +declare float @llvm.cos.f32(float) + +; CHECK: type { ptr, i16, [6 x i8] } + +; Shared ZA Caller, Private ZA Callee + +define void @shared_za_caller() "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_caller() #4 { +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: call void @private_za_callee() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret void +; + call void @private_za_callee() + ret void +} + +; Shared ZA Caller, Private ZA Callees + +define i64 @shared_za_caller_multiple_callees(i64 %a) "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_caller_multiple_callees(i64 %a) #4 { +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: %b = call i64 @foo(i64 %a) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %sum = add i64 %a, %b +; CHECK-NEXT: %live3 = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc4 = trunc i64 %live3 to i16 +; CHECK-NEXT: %tpidr2.obj.live5 = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc4, ptr %tpidr2.obj.live5, align 2 +; CHECK-NEXT: %tpi.int6 = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int6) +; CHECK-NEXT: %c = call i64 @foo(i64 %sum) +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr27 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp8 = icmp eq i64 %tpidr27, 0 +; CHECK-NEXT: br i1 %cmp8, label %restore.za2, label %resume1 +; CHECK: restore.za2: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume1 +; CHECK: resume1: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: %res = mul i64 %sum, %c +; CHECK-NEXT: ret i64 %res +; + %b = call i64 @foo(i64 %a) + %sum = add i64 %a, %b + %c = call i64 @foo(i64 %sum) + %res = mul i64 %sum, %c + ret i64 %res +} + +; Shared ZA Caller, New ZA Callee + +define void @shared_za_new_za_callee() "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_new_za_callee() #4 { +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: call void @new_za_callee() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret void +; + call void @new_za_callee() + ret void +} + +define void @shared_za_streaming_compatible_caller_private_za_callee() "aarch64_pstate_za_shared" "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_streaming_compatible_caller_private_za_callee() #5 { +; CHECK-NEXT: %tpidr2.call.obj = alloca %tpidr2_ty, align 8 +; CHECK-NEXT: %N = call i64 @llvm.aarch64.sme.cntsb() +; CHECK-NEXT: %NN = mul i64 %N, %N +; CHECK-NEXT: %buffer = alloca i8, i64 %NN, align 16 +; CHECK-NEXT: %tpidr2.call.obj.buffer = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 0 +; CHECK-NEXT: store ptr %buffer, ptr %tpidr2.call.obj.buffer, align 8 +; CHECK-NEXT: %live = call i64 @llvm.aarch64.sme.get.live.za.slices() +; CHECK-NEXT: %live.trunc = trunc i64 %live to i16 +; CHECK-NEXT: %tpidr2.obj.live = getelementptr %tpidr2_ty, ptr %tpidr2.call.obj, i64 0, i32 1 +; CHECK-NEXT: store i16 %live.trunc, ptr %tpidr2.obj.live, align 2 +; CHECK-NEXT: %tpi.int = ptrtoint ptr %tpidr2.call.obj to i64 +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 %tpi.int) +; CHECK-NEXT: call void @private_za_callee() +; CHECK-NEXT: call void @llvm.aarch64.sme.start.pstateza() +; CHECK-NEXT: %tpidr2 = call i64 @llvm.aarch64.sme.get.tpidr2() +; CHECK-NEXT: %cmp = icmp eq i64 %tpidr2, 0 +; CHECK-NEXT: br i1 %cmp, label %restore.za, label %resume +; CHECK: restore.za: +; CHECK-NEXT: call void @llvm.aarch64.sme.tpidr2.restore(ptr %tpidr2.call.obj) +; CHECK-NEXT: br label %resume +; CHECK: resume: +; CHECK-NEXT: call void @llvm.aarch64.sme.set.tpidr2(i64 0) +; CHECK-NEXT: ret void +; + call void @private_za_callee() + ret void +} + +; Shared ZA Caller, Shared ZA Callee (Lazy-save not required) + +define void @shared_za_shared_za_callee() "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_shared_za_callee() #0 { +; CHECK-NEXT: call void @shared_za_callee() +; CHECK-NEXT: ret void +; + call void @shared_za_callee() + ret void +} + +; Ensure we also check the attribute on the call itself (not just from the called function) +define void @shared_za_shared_za_callee_from_ptr(ptr %fnptr) "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_shared_za_callee_from_ptr(ptr %fnptr) #0 { +; CHECK-NEXT: call void %fnptr() #0 +; CHECK-NEXT: ret void +; + call void %fnptr() "aarch64_pstate_za_shared" + ret void +} + +; Shared ZA Caller, Preserved ZA Callee (Lazy-save not required) + +define void @shared_za_caller_preserved_za_callee() "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_caller_preserved_za_callee() #0 { +; CHECK-NEXT: call void @preserved_za_callee() +; CHECK-NEXT: ret void +; + call void @preserved_za_callee() + ret void +} + +; Shared ZA Caller with Intrinsic Call (Lazy-save not required) + +define float @shared_za_caller_with_intrinsic(ptr %a) "aarch64_pstate_za_shared" { +; CHECK-LABEL: define {{[^@]+}}@shared_za_caller_with_intrinsic(ptr %a) #0 { +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[A:%.*]]) +; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[A]], align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull [[A]]) +; CHECK-NEXT: ret float [[RES]] + call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull %a) + %res = load float, ptr %a + call void @llvm.lifetime.start.p0(i64 -1, ptr nonnull %a) + ret float %res +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +;. +; CHECK: attributes #0 = { "aarch64_pstate_za_shared" } +; CHECK: attributes #1 = { "aarch64_pstate_za_new" } +; CHECK: attributes #2 = { "aarch64_pstate_za_preserved" } +; CHECK: attributes #3 = { nocallback nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #4 = { "aarch64_expanded_pstate_za" "aarch64_pstate_za_shared" } +; CHECK: attributes #5 = { "aarch64_expanded_pstate_za" "aarch64_pstate_sm_compatible" "aarch64_pstate_za_shared" } +; CHECK: attributes #6 = { argmemonly nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #7 = { nocallback nofree nosync nounwind readnone willreturn } +; CHECK: attributes #8 = { nocallback nofree nosync nounwind willreturn } +; CHECK: attributes #9 = { inaccessiblememonly nocallback nofree nosync nounwind readonly willreturn } +;. diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll --- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll @@ -16,7 +16,7 @@ ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #16 -; CHECK-NEXT: str x9, [x29] +; CHECK-NEXT: stur x9, [x29, #-16] ; CHECK-NEXT: sturh w8, [x29, #-8] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl private_za_callee @@ -39,29 +39,33 @@ define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind { ; CHECK-LABEL: f128_call_za: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: mul x8, x8, x8 +; CHECK-NEXT: mov x19, sp ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #16 ; CHECK-NEXT: sturh w8, [x29, #-8] -; CHECK-NEXT: str x9, [x29] +; CHECK-NEXT: stur x9, [x29, #-16] ; CHECK-NEXT: msr TPIDR2_EL0, x10 ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: smstart za -; CHECK-NEXT: add x0, x29, #0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 ; CHECK-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore ; CHECK-NEXT: .LBB1_2: ; CHECK-NEXT: msr TPIDR2_EL0, xzr +; CHECK-NEXT: mov sp, x19 ; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = fadd fp128 %a, %b ret fp128 %res diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -start-after=simplifycfg -enable-tail-merge=false -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s declare void @normal_callee(); declare void @streaming_callee() "aarch64_pstate_sm_enabled"; @@ -263,3 +263,46 @@ } declare double @llvm.cos.f64(double) + + +define float @test_arg_survives_loop(float %arg, i32 %N) nounwind "aarch64_pstate_sm_body" { +; CHECK-LABEL: test_arg_survives_loop: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: smstart sm +; CHECK-NEXT: .LBB9_1: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subs w0, w0, #1 +; CHECK-NEXT: b.ne .LBB9_1 +; CHECK-NEXT: // %bb.2: // %for.cond.cleanup +; CHECK-NEXT: fmov s0, #1.00000000 +; CHECK-NEXT: ldr s1, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-NEXT: smstop sm +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret +entry: + br label %for.body + +for.body: + %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %inc = add nuw nsw i32 %i.02, 1 + %exitcond.not = icmp eq i32 %inc, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + %add = fadd float %arg, 1.000000e+00 + ret float %add + +} diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s ; This file tests the following combinations related to streaming-enabled functions: ; [ ] N -> S (Normal -> Streaming) diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-locally-interface.ll copy from llvm/test/CodeGen/AArch64/sme-streaming-body.ll copy to llvm/test/CodeGen/AArch64/sme-streaming-locally-interface.ll --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-locally-interface.ll @@ -3,7 +3,8 @@ declare void @normal_callee(); declare void @streaming_callee() "aarch64_pstate_sm_enabled"; -declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; + +; Streaming Caller, Locally Streaming Callee define void @locally_streaming_caller_streaming_callee() "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_streaming_callee: @@ -14,8 +15,8 @@ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: bl streaming_compatible_callee -; CHECK-NEXT: bl streaming_compatible_callee +; CHECK-NEXT: bl streaming_callee +; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload @@ -24,21 +25,6 @@ ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret - call void @streaming_compatible_callee(); - call void @streaming_compatible_callee(); - ret void; -} - -; Test that a streaming body and streaming interface, no smstart/smstop are emitted, -; because the function already is in streaming mode upon entry. -define void @streaming_and_locally_streaming_caller_streaming_callee() "aarch64_pstate_sm_enabled" "aarch64_pstate_sm_body" nounwind { -; CHECK-LABEL: streaming_and_locally_streaming_caller_streaming_callee: -; CHECK: // %bb.0: -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: bl streaming_callee -; CHECK-NEXT: bl streaming_callee -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret call void @streaming_callee(); call void @streaming_callee(); ret void; @@ -53,7 +39,7 @@ ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm ; CHECK-NEXT: cmp x0, #1 -; CHECK-NEXT: b.ne .LBB2_2 +; CHECK-NEXT: b.ne .LBB1_2 ; CHECK-NEXT: // %bb.1: // %if.else ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload @@ -61,7 +47,7 @@ ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_2: // %if.end +; CHECK-NEXT: .LBB1_2: // %if.end ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload @@ -94,9 +80,9 @@ ; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: add v0.2d, v1.2d, v0.2d ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm @@ -112,27 +98,39 @@ ret <2 x i64> %add; } -; Test that we use the interface (not the function's body) to determine what -; streaming-mode to enter the callee. In this case the interface is normal, so -; pstate.sm must be 0 on entry and is 0 upon return from the callee. +define void @locally_streaming_caller_normal_callee() "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: locally_streaming_caller_normal_callee: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl normal_callee +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + + call void @normal_callee(); + ret void; +} + +define void @streaming_and_locally_streaming_caller_streaming_callee() "aarch64_pstate_sm_enabled" "aarch64_pstate_sm_body" nounwind { +; CHECK-LABEL: streaming_and_locally_streaming_caller_streaming_callee: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: bl streaming_callee +; CHECK-NEXT: bl streaming_callee +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @streaming_callee(); + call void @streaming_callee(); + ret void; +} + +; Locally Streaming Caller, Locally Streaming Callee + define void @locally_streaming_caller_locally_streaming_callee() "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_locally_streaming_callee: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: smstart sm -; CHECK-NEXT: smstop sm +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: bl locally_streaming_caller_streaming_callee -; CHECK-NEXT: smstart sm -; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret call void @locally_streaming_caller_streaming_callee(); @@ -175,32 +173,33 @@ declare <2 x i64> @streaming_compatible_callee_vec_args_ret(<2 x i64>) "aarch64_pstate_sm_compatible" -define {<2 x i64>, <2 x i64>} @locally_streaming_caller_compatible_callee_struct_arg_ret({<2 x i64>, <2 x i64>} %arg) "aarch64_pstate_sm_body" nounwind { +define <2 x i64> @locally_streaming_caller_compatible_callee_struct_arg_ret({<2 x i64>, <2 x i64>} %arg) "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: locally_streaming_caller_compatible_callee_struct_arg_ret: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #112 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #96 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: bl streaming_compatible_callee_vec_arg_struct_ret -; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm -; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #112 +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %v1.arg = extractvalue {<2 x i64>, <2 x i64>} %arg, 1 %res = call {<2 x i64>, <2 x i64>} @streaming_compatible_callee_vec_arg_struct_ret(<2 x i64> %v1.arg) "aarch64_pstate_sm_compatible" - ret {<2 x i64>, <2 x i64>} %res; + %v1.res = extractvalue {<2 x i64>, <2 x i64>} %res, 1 + ret <2 x i64> %v1.res; } declare {<2 x i64>, <2 x i64>} @streaming_compatible_callee_vec_arg_struct_ret(<2 x i64>) "aarch64_pstate_sm_compatible" diff --git a/llvm/test/CodeGen/AArch64/sme-streaming.ll b/llvm/test/CodeGen/AArch64/sme-streaming.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-streaming.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme < %s | FileCheck %s + +; Streaming mode functions can use the full SME instruction set and the subset +; of SVE and NEON instructions that are legal in streaming mode. +; + +; CHECK: sclamp z0.b, z0.b, z0.b +define @streaming_compatible_sme( %x) { + %1 = call asm "sclamp $0.b, $0.b, $0.b", "=w,0"( %x) + ret %1 +} + +; CHECK: add z0.b, z0.b, z0.b +define @streaming_compatible_sve( %x) { + %1 = call asm "add $0.b, $0.b, $0.b", "=w,0"( %x) + ret %1 +} + +; CHECK: fmulx s0, s0, s0 +define float @streaming_compatible_neon(float %x) { + %1 = call float asm "fmulx ${0:s}, ${0:s}, ${0:s}", "=w,0"(float %x) + ret float %1 +} diff --git a/llvm/test/CodeGen/AArch64/sme-tailcall.ll b/llvm/test/CodeGen/AArch64/sme-tailcall.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-tailcall.ll @@ -0,0 +1,194 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -opaque-pointers -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +declare void @normal_callee(); +declare void @streaming_callee() "aarch64_pstate_sm_enabled"; +declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; + + +; Caller is non-streaming mode + +define void @non_streaming_caller_to_streaming_callee() nounwind { +; CHECK-LABEL: non_streaming_caller_to_streaming_callee: +; CHECK: // %bb.0: // %entry +; CHECK: smstart sm +; CHECK: bl streaming_callee +; CHECK: smstop sm +entry: + tail call void @streaming_callee() "aarch64_pstate_sm_enabled" + ret void +} + +define void @non_streaming_caller_to_streaming_compatible_callee() nounwind { +; CHECK-LABEL: non_streaming_caller_to_streaming_compatible_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK: b streaming_compatible_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" + ret void +} + +; Caller is streaming mode + +define void @streaming_caller_to_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: streaming_caller_to_streaming_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK-NEXT: b streaming_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @streaming_callee() "aarch64_pstate_sm_enabled" + ret void +} + +define void @streaming_caller_to_non_streaming_callee() nounwind "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: streaming_caller_to_non_streaming_callee: +; CHECK: // %bb.0: // %entry +; CHECK: smstop sm +; CHECK: bl normal_callee +; CHECK: smstart sm +entry: + tail call void @normal_callee() + ret void +} + +define void @streaming_caller_to_streaming_compatible_callee() nounwind "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: streaming_caller_to_streaming_compatible_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK: b streaming_compatible_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" + ret void +} + +; Caller is streaming compatible mode + +define void @streaming_compatible_caller_to_streaming_callee() nounwind "aarch64_pstate_sm_compatible"{ +; CHECK-LABEL: streaming_compatible_caller_to_streaming_callee: +; CHECK: // %bb.1: // %entry +; CHECK: smstart sm +; CHECK: bl streaming_callee +; CHECK: smstop sm +entry: + tail call void @streaming_callee() "aarch64_pstate_sm_enabled" + ret void +} + +define void @streaming_compatible_caller_to_non_streaming_callee() nounwind "aarch64_pstate_sm_compatible"{ +; CHECK-LABEL: streaming_compatible_caller_to_non_streaming_callee: +; CHECK: // %bb.1: // %entry +; CHECK: smstop sm +; CHECK: bl normal_callee +; CHECK: smstart sm +entry: + tail call void @normal_callee() + ret void +} + +define void @streaming_compatible_caller_to_streaming_compatible_callee() nounwind "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: streaming_compatible_caller_to_streaming_compatible_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK: b streaming_compatible_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" + ret void +} + +declare void @za_new_callee() "aarch64_pstate_za_new"; +declare void @za_shared_callee() "aarch64_pstate_za_shared"; +declare void @za_preserved_callee() "aarch64_pstate_za_preserved"; + +; Caller with ZA state new + +define void @za_new_caller_to_za_new_callee() nounwind "aarch64_pstate_za_new" { +; CHECK-LABEL: za_new_caller_to_za_new_callee: +; CHECK: // %bb.0: // %prelude +; CHECK: bl __arm_tpidr2_save +; CHECK: smstart za +; CHECK: bl za_new_callee +; CHECK: smstart za +; CHECK: bl __arm_tpidr2_restore +; CHECK: smstop za +entry: + tail call void @za_new_callee() "aarch64_pstate_za_new"; + ret void; +} + + +define void @za_new_caller_to_za_shared_callee() nounwind "aarch64_pstate_za_new" { +; CHECK-LABEL: za_new_caller_to_za_shared_callee: +; CHECK: // %bb.0: // %prelude +; CHECK: bl __arm_tpidr2_save +; CHECK: smstart za +; CHECK: bl za_shared_callee +; CHECK: smstop za +entry: + tail call void @za_shared_callee() "aarch64_pstate_za_shared"; + ret void; +} + +define void @za_new_caller_to_za_preserved_callee() nounwind "aarch64_pstate_za_new" { +; CHECK-LABEL: za_new_caller_to_za_preserved_callee: +; CHECK: // %bb.0: // %prelude +; CHECK: bl __arm_tpidr2_save +; CHECK: smstart za +; CHECK: bl za_preserved_callee +; CHECK: smstop za +entry: + tail call void @za_preserved_callee() "aarch64_pstate_za_preserved"; + ret void; +} + + +; Caller with ZA state shared + +define void @za_shared_caller_to_za_new_callee() nounwind "aarch64_pstate_za_shared" { +; CHECK-LABEL: za_shared_caller_to_za_new_callee: +; CHECK: // %bb.0: // %entry +; CHECK: bl za_new_callee +; CHECK-NEXT: smstart za +; CHECK: bl __arm_tpidr2_restore +entry: + tail call void @za_new_callee() "aarch64_pstate_za_new"; + ret void; +} + +define void @za_shared_caller_to_za_shared_callee() nounwind "aarch64_pstate_za_shared" { +; CHECK-LABEL: za_shared_caller_to_za_shared_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK-NEXT: b za_shared_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @za_shared_callee() "aarch64_pstate_za_shared"; + ret void; +} + +define void @za_shared_caller_to_za_preserved_callee() nounwind "aarch64_pstate_za_shared" { +; CHECK-LABEL: za_shared_caller_to_za_preserved_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK-NEXT: b za_preserved_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @za_preserved_callee() "aarch64_pstate_za_preserved"; + ret void; +} + +; Caller with ZA state preserved + +define void @za_preserved_caller_to_za_preserved_callee() nounwind "aarch64_pstate_za_preserved" { +; CHECK-LABEL: za_preserved_caller_to_za_preserved_callee: +; CHECK: // %bb.0: // %entry +; CHECK-NOT: {{smstart|smstop}} +; CHECK-NEXT: b za_preserved_callee +; CHECK-NOT: {{smstart|smstop}} +entry: + tail call void @za_preserved_callee() "aarch64_pstate_za_preserved"; + ret void; +} diff --git a/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll b/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll --- a/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll +++ b/llvm/test/CodeGen/AArch64/sme-toggle-pstateza.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 -mattr=+sme -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mattr=+sme -enable-aarch64-sme-peephole-opt=false -verify-machineinstrs < %s | FileCheck %s define void @toggle_pstate_za() { ; CHECK-LABEL: toggle_pstate_za: diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-extract-mova.ll @@ -0,0 +1,532 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; +; Move Multi-Vector From Tile (Read) x2 +; + +; Horizontal + +define { , } @za_read_horiz_vg2_b(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.b, z1.b }, za0h.b[w12, 0:1] +; CHECK-NEXT: mov { z0.b, z1.b }, za0h.b[w12, 14:15] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 %slice) + %slice.14 = add i32 %slice, 14 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32 %slice.14) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_h(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1] +; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 6:7] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 %slice) + %slice.6 = add i32 %slice, 6 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32 %slice.6) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_f16(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1] +; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 6:7] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 %slice) + %slice.6 = add i32 %slice, 6 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32 %slice.6) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_bf16(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 0:1] +; CHECK-NEXT: mov { z0.h, z1.h }, za0h.h[w12, 6:7] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 %slice) + %slice.6 = add i32 %slice, 6 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32 %slice.6) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_s(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 0:1] +; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 2:3] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 %slice) + %slice.2 = add i32 %slice, 2 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32 %slice.2) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_f32(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 0:1] +; CHECK-NEXT: mov { z0.s, z1.s }, za0h.s[w12, 2:3] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 %slice) + %slice.2 = add i32 %slice, 2 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32 %slice.2) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_d(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1] +; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32 %slice.0) + ret { , } %res2 +} + +define { , } @za_read_horiz_vg2_f64(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1] +; CHECK-NEXT: mov { z0.d, z1.d }, za0h.d[w12, 0:1] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32 %slice.0) + ret { , } %res2 +} + +; Vertical + +define { , } @za_read_vert_vg2_b(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.b, z1.b }, za0v.b[w12, 0:1] +; CHECK-NEXT: mov { z0.b, z1.b }, za0v.b[w12, 14:15] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 %slice) + %slice.14 = add i32 %slice, 14 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32 %slice.14) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_h(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1] +; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 6:7] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 %slice) + %slice.6 = add i32 %slice, 6 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32 %slice.6) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_f16(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1] +; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 6:7] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 %slice) + %slice.6 = add i32 %slice, 6 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32 %slice.6) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_bf16(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 0:1] +; CHECK-NEXT: mov { z0.h, z1.h }, za0v.h[w12, 6:7] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 %slice) + %slice.6 = add i32 %slice, 6 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32 %slice.6) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_s(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 0:1] +; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 2:3] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 %slice) + %slice.2 = add i32 %slice, 2 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32 %slice.2) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_f32(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 0:1] +; CHECK-NEXT: mov { z0.s, z1.s }, za0v.s[w12, 2:3] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 %slice) + %slice.2 = add i32 %slice, 2 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32 %slice.2) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_d(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1] +; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32 %slice.0) + ret { , } %res2 +} + +define { , } @za_read_vert_vg2_f64(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1] +; CHECK-NEXT: mov { z0.d, z1.d }, za0v.d[w12, 0:1] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32 %slice.0) + ret { , } %res2 +} + +; +; Move Multi-Vector From Tile (Read) x4 +; + +; Horizontal + +define { , , , } @za_read_horiz_vg4_b(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.b - z3.b }, za0h.b[w12, 0:3] +; CHECK-NEXT: mov { z0.b - z3.b }, za0h.b[w12, 12:15] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 %slice) + %slice.12 = add i32 %slice, 12 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32 %slice.12) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_h(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3] +; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 4:7] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 %slice) + %slice.4 = add i32 %slice, 4 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32 %slice.4) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_f16(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3] +; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 4:7] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 %slice) + %slice.4 = add i32 %slice, 4 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32 %slice.4) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_bf16(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 0:3] +; CHECK-NEXT: mov { z0.h - z3.h }, za0h.h[w12, 4:7] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 %slice) + %slice.4 = add i32 %slice, 4 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32 %slice.4) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_s(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3] +; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32 %slice.0) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_f32(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3] +; CHECK-NEXT: mov { z0.s - z3.s }, za0h.s[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32 %slice.0) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_d(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3] +; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32 %slice.0) + ret { , , , } %res2 +} + +define { , , , } @za_read_horiz_vg4_f64(i32 %slice) { +; CHECK-LABEL: za_read_horiz_vg4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3] +; CHECK-NEXT: mov { z0.d - z3.d }, za0h.d[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32 %slice.0) + ret { , , , } %res2 +} + +; Vertical + +define { , , , } @za_read_vert_vg4_b(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.b - z3.b }, za0v.b[w12, 0:3] +; CHECK-NEXT: mov { z0.b - z3.b }, za0v.b[w12, 12:15] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 %slice) + %slice.12 = add i32 %slice, 12 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32 %slice.12) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_h(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3] +; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 4:7] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 %slice) + %slice.4 = add i32 %slice, 4 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32 %slice.4) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_f16(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3] +; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 4:7] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 %slice) + %slice.4 = add i32 %slice, 4 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32 %slice.4) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_bf16(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 0:3] +; CHECK-NEXT: mov { z0.h - z3.h }, za0v.h[w12, 4:7] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 %slice) + %slice.4 = add i32 %slice, 4 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32 %slice.4) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_s(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3] +; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32 %slice.0) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_f32(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3] +; CHECK-NEXT: mov { z0.s - z3.s }, za0v.s[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32 %slice.0) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_d(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3] +; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32 %slice.0) + ret { , , , } %res2 +} + +define { , , , } @za_read_vert_vg4_f64(i32 %slice) { +; CHECK-LABEL: za_read_vert_vg4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3] +; CHECK-NEXT: mov { z0.d - z3.d }, za0v.d[w12, 0:3] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 %slice) + %slice.0 = add i32 %slice, 0 + %res2 = call { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32 %slice.0) + ret { , , , } %res2 +} + +; Move Multi-Vector From ZA (Read) x2 + +define { , } @za_read_vg1x2_d(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32 %slice.7) + ret { , } %res2 +} + +define { , } @za_read_vg1x2_f64(i32 %slice) { +; CHECK-LABEL: za_read_vg1x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +; CHECK-NEXT: mov { z0.d, z1.d }, za.d[w8, 7, vgx2] +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32 %slice.7) + ret { , } %res2 +} + +; Move Multi-Vector From ZA (Read) x4 + +define { , , , } @za_read_vg1x4_d(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32 %slice.7) + ret { , , , } %res2 +} + +define { , , , } @za_read_vg1x4_f64(i32 %slice) { +; CHECK-LABEL: za_read_vg1x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +; CHECK-NEXT: mov { z0.d - z3.d }, za.d[w8, 7, vgx4] +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 %slice) + %slice.7 = add i32 %slice, 7 + %res2 = call { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32 %slice.7) + ret { , , , } %res2 +} + +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv16i8(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv8i16(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv8f16(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv8bf16(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv4i32(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv4f32(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv2i64(i32) +declare { , } @llvm.aarch64.sme.read.hor.vg2.nxv2f64(i32) + +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv16i8(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8i16(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8f16(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv8bf16(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4i32(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv4f32(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2i64(i32) +declare { , , , } @llvm.aarch64.sme.read.hor.vg4.nxv2f64(i32) + +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv16i8(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv8i16(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv8f16(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv8bf16(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv4i32(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv4f32(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv2i64(i32) +declare { , } @llvm.aarch64.sme.read.ver.vg2.nxv2f64(i32) + +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv16i8(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8i16(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8f16(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv8bf16(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4i32(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv4f32(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2i64(i32) +declare { , , , } @llvm.aarch64.sme.read.ver.vg4.nxv2f64(i32) + +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv2i64(i32) +declare { , } @llvm.aarch64.sme.read.vg1x2.nxv2f64(i32) + +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2i64(i32) +declare { , , , } @llvm.aarch64.sme.read.vg1x4.nxv2f64(i32) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll @@ -0,0 +1,618 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+sme-f64f64 -verify-machineinstrs | FileCheck %s + +; FMLA (SINGLE) + +define void @multi_vector_add_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s +; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 %slice, + %zn0, %zn1, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zm) + ret void +} + +define void @multi_vector_add_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d +; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 %slice, + %zn0, %zn1, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zm) + ret void +} + +; Test to ensure the correct register class is used (first register in the list should be a multiple of 2) +define void @multi_vector_add_single_vg1x2_s_regclass(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg1x2_s_regclass: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z3.s, z4.s }, z2.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32 %slice, + %zn1, %zn0, + %zm) + ret void +} + +define void @multi_vector_add_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_single_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s +; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_add_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_single_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d +; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm) + ret void +} + +; Test to ensure the correct register class is used (first register in the list should be a multiple of 4) +define void @multi_vector_add_single_vg1x4_d_regclass(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_single_vg1x4_d_regclass: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z26.d, z3.d +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: mov z27.d, z0.d +; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z24.d - z27.d }, z4.d +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32 %slice, + %zn1, %zn2, + %zn3, %zn0, + %zm) + ret void +} + +; FMLS (SINGLE) + +define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s +; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 %slice, + %zn0, %zn1, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zm) + ret void +} + +define void @multi_vector_sub_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d +; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 %slice, + %zn0, %zn1, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zm) + ret void +} + +define void @multi_vector_sub_single_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_single_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s +; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_sub_single_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_single_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d +; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm) + ret void +} + +; FMLA (MULTI) + +define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: multi_vector_add_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %zm1, %zm2) { + call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv4f32(i32 %slice, + %zn0, %zn1, + %zm1, %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zm1, %zm2) + ret void +} + +define void @multi_vector_add_vg1x2_d(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: multi_vector_add_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %zm1, %zm2) { + call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv2f64(i32 %slice, + %zn0, %zn1, + %zm1, %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zm1, %zm2) + ret void +} + +define void @multi_vector_add_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %zm1, %zm2, %zm3, %zm4) { + call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv4f32(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv4f32(i32 %slice.7, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + ret void +} + +define void @multi_vector_add_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %zm1, %zm2, %zm3, %zm4) { + call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv2f64(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv2f64(i32 %slice.7, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + ret void +} + +; FMLS (MULTI) + +define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: multi_vector_sub_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, { z2.s, z3.s } +; CHECK-NEXT: ret + %zm1, %zm2) { + call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv4f32(i32 %slice, + %zn0, %zn1, + %zm1, %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zm1, %zm2) + ret void +} + +define void @multi_vector_sub_vg1x2_d(i32 %slice, %zn0, %zn1, +; CHECK-LABEL: multi_vector_sub_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, { z2.d, z3.d } +; CHECK-NEXT: ret + %zm1, %zm2) { + call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv2f64(i32 %slice, + %zn0, %zn1, + %zm1, %zm2) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zm1, %zm2) + ret void +} + +define void @multi_vector_sub_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, { z4.s - z7.s } +; CHECK-NEXT: ret + %zm1, %zm2, %zm3, %zm4) { + call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv4f32(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv4f32(i32 %slice.7, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + ret void +} + +define void @multi_vector_sub_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, { z4.d - z7.d } +; CHECK-NEXT: ret + %zm1, %zm2, %zm3, %zm4) { + call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv2f64(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv2f64(i32 %slice.7, + %zn0, %zn1, %zn2, %zn3, + %zm1, %zm2, %zm3, %zm4) + ret void +} + +; FMLA (INDEXED) + +define void @multi_vector_add_lane_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] +; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 %slice, + %zn0, %zn1, + %zm, i32 3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zm, i32 3) + ret void +} + +define void @multi_vector_add_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] +; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 %slice, + %zn0, %zn1, + %zm, i32 1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zm, i32 1) + ret void +} + +define void @multi_vector_add_lane_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_lane_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] +; CHECK-NEXT: fmla za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 3) + ret void +} + +define void @multi_vector_add_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_lane_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] +; CHECK-NEXT: fmla za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 1) + ret void +} + +; FMLS (INDEXED) + +define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] +; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 %slice, + %zn0, %zn1, + %zm, i32 3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zm, i32 3) + ret void +} + +define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] +; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 %slice, + %zn0, %zn1, + %zm, i32 1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zm, i32 1) + ret void +} + +define void @multi_vector_sub_lane_vg1x4_s(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_lane_vg1x4_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z4.s[3] +; CHECK-NEXT: fmls za.s[w8, 7, vgx4], { z0.s - z3.s }, z4.s[3] +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 3) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 3) + ret void +} + +define void @multi_vector_sub_lane_vg1x4_d(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_lane_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z4.d[1] +; CHECK-NEXT: fmls za.d[w8, 7, vgx4], { z0.d - z3.d }, z4.d[1] +; CHECK-NEXT: ret + %zm) { + call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 %slice, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 1) + %slice.7 = add i32 %slice, 7 + call void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32 %slice.7, + %zn0, %zn1, + %zn2, %zn3, + %zm, i32 1) + ret void +} + +declare void @llvm.aarch64.sme.fmla.single.vg1x2.nxv4f32(i32, , , ) +declare void @llvm.aarch64.sme.fmla.single.vg1x2.nxv2f64(i32, , , ) +declare void @llvm.aarch64.sme.fmla.single.vg1x4.nxv4f32(i32, , , , , ) +declare void @llvm.aarch64.sme.fmla.single.vg1x4.nxv2f64(i32, , , , , ) + +declare void @llvm.aarch64.sme.fmls.single.vg1x2.nxv4f32(i32, , , ) +declare void @llvm.aarch64.sme.fmls.single.vg1x2.nxv2f64(i32, , , ) +declare void @llvm.aarch64.sme.fmls.single.vg1x4.nxv4f32(i32, , , , , ) +declare void @llvm.aarch64.sme.fmls.single.vg1x4.nxv2f64(i32, , , , , ) + +declare void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.fmla.multi.vg1x2.nxv2f64(i32, , , , ) +declare void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv4f32(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.fmla.multi.vg1x4.nxv2f64(i32, , , , , + , , , ) + +declare void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.fmls.multi.vg1x2.nxv2f64(i32, , , , ) +declare void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv4f32(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.fmls.multi.vg1x4.nxv2f64(i32, , , , , + , , , ) + +declare void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv4f32(i32, , , , i32) +declare void @llvm.aarch64.sme.fmla.lane.vg1x2.nxv2f64(i32, , , , i32) +declare void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv4f32(i32, , , , , , i32) +declare void @llvm.aarch64.sme.fmla.lane.vg1x4.nxv2f64(i32, , , , , , i32) + +declare void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv4f32(i32, , , , i32) +declare void @llvm.aarch64.sme.fmls.lane.vg1x2.nxv2f64(i32, , , , i32) +declare void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv4f32(i32, , , , , , i32) +declare void @llvm.aarch64.sme.fmls.lane.vg1x4.nxv2f64(i32, , , , , , i32) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll @@ -0,0 +1,632 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s + +; +; Move Multi-Vector To Tile (Write) x 2 +; + +; Horizontal + +define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b } +; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 %slice, %zn1, %zn2) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32 %slice.14, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_h(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } +; CHECK-NEXT: mov za0h.h[w12, 6:7], { z0.h, z1.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 %slice, %zn1, %zn2) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32 %slice.6, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_f16(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } +; CHECK-NEXT: mov za0h.h[w12, 6:7], { z0.h, z1.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 %slice, %zn1, %zn2) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32 %slice.6, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_bf16(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } +; CHECK-NEXT: mov za0h.h[w12, 6:7], { z0.h, z1.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 %slice, %zn1, %zn2) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32 %slice.6, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_s(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } +; CHECK-NEXT: mov za0h.s[w12, 2:3], { z0.s, z1.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 %slice, %zn1, %zn2) + %slice.2 = add i32 %slice, 2 + call void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32 %slice.2, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_f32(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } +; CHECK-NEXT: mov za0h.s[w12, 2:3], { z0.s, z1.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 %slice, %zn1, %zn2) + %slice.2 = add i32 %slice, 2 + call void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32 %slice.2, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_d(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 %slice, %zn1, %zn2) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32 %slice.0, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_horiz_f64(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_horiz_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 %slice, %zn1, %zn2) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32 %slice.0, %zn1, %zn2) + ret void +} + +; Vertical + +define void @za_write_vg2_vert_b(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_b: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b } +; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 %slice, %zn1, %zn2) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32 %slice.14, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_h(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_h: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } +; CHECK-NEXT: mov za0v.h[w12, 6:7], { z0.h, z1.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 %slice, %zn1, %zn2) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32 %slice.6, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_f16(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } +; CHECK-NEXT: mov za0v.h[w12, 6:7], { z0.h, z1.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 %slice, %zn1, %zn2) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32 %slice.6, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_bf16(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } +; CHECK-NEXT: mov za0v.h[w12, 6:7], { z0.h, z1.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 %slice, %zn1, %zn2) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32 %slice.6, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_s(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_s: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } +; CHECK-NEXT: mov za0v.s[w12, 2:3], { z0.s, z1.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 %slice, %zn1, %zn2) + %slice.2 = add i32 %slice, 2 + call void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32 %slice.2, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_f32(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } +; CHECK-NEXT: mov za0v.s[w12, 2:3], { z0.s, z1.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 %slice, %zn1, %zn2) + %slice.2 = add i32 %slice, 2 + call void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32 %slice.2, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_d(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 %slice, %zn1, %zn2) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32 %slice.0, %zn1, %zn2) + ret void +} + +define void @za_write_vg2_vert_f64(i32 %slice, %zn1, %zn2) { +; CHECK-LABEL: za_write_vg2_vert_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 %slice, %zn1, %zn2) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32 %slice.0, %zn1, %zn2) + ret void +} + +; +; Move Multi-Vector To Tile (Write) x 4 +; + +; Horizontal + +define void @za_write_vg4_horiz_b(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.b[w12, 0:3], { z0.b - z3.b } +; CHECK-NEXT: mov za0h.b[w12, 12:15], { z0.b - z3.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.12 = add i32 %slice, 12 + call void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32 %slice.12, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_h(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_h: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } +; CHECK-NEXT: mov za0h.h[w12, 4:7], { z0.h - z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.4 = add i32 %slice, 4 + call void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32 %slice.4, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_f16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } +; CHECK-NEXT: mov za0h.h[w12, 4:7], { z0.h - z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.4 = add i32 %slice, 4 + call void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32 %slice.4, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_bf16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.h[w12, 0:3], { z0.h - z3.h } +; CHECK-NEXT: mov za0h.h[w12, 4:7], { z0.h - z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.4 = add i32 %slice, 4 + call void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32 %slice.4, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_s(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_f32(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: mov za0h.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_d(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_horiz_f64(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_horiz_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: mov za0h.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +; Vertical + +define void @za_write_vg4_vert_b(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_b: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.b[w12, 0:3], { z0.b - z3.b } +; CHECK-NEXT: mov za0v.b[w12, 12:15], { z0.b - z3.b } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.12 = add i32 %slice, 12 + call void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32 %slice.12, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_h(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_h: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } +; CHECK-NEXT: mov za0v.h[w12, 4:7], { z0.h - z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.4 = add i32 %slice, 4 + call void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32 %slice.4, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_f16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } +; CHECK-NEXT: mov za0v.h[w12, 4:7], { z0.h - z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.4 = add i32 %slice, 4 + call void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32 %slice.4, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_bf16(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.h[w12, 0:3], { z0.h - z3.h } +; CHECK-NEXT: mov za0v.h[w12, 4:7], { z0.h - z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.4 = add i32 %slice, 4 + call void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32 %slice.4, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_s(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_s: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_f32(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: mov za0v.s[w12, 0:3], { z0.s - z3.s } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_d(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +define void @za_write_vg4_vert_f64(i32 %slice, %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: za_write_vg4_vert_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w12, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: mov za0v.d[w12, 0:3], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 %slice, %zn1, %zn2, %zn3, %zn4) + %slice.0 = add i32 %slice, 0 + call void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32 %slice.0, %zn1, %zn2, %zn3, %zn4) + ret void +} + +; +; Move Multi-Vector To ZA (Write) x2 +; + +define void @za_write_vg1x2_d(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_d: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32 %slice, %za1, %za2) + ret void +} + +define void @za_write_vg1x2_f64(i32 %slice, %za1, %za2) { +; CHECK-LABEL: za_write_vg1x2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32 %slice, %za1, %za2) + ret void +} + +; +; Move Multi-Vector To ZA (Write) x4 +; + +define void @za_write_vg1x4_d(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_d: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32 %slice, %za1, %za2, %za3, %za4) + ret void +} + +define void @za_write_vg1x4_f64(i32 %slice, %za1, %za2, %za3, %za4) { +; CHECK-LABEL: za_write_vg1x4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32 %slice, %za1, %za2, %za3, %za4) + ret void +} + +declare void @llvm.aarch64.sme.write.hor.vg2.nxv16i8(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv8i16(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv8f16(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv8bf16(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv4i32(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv4f32(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv2i64(i32, , ) +declare void @llvm.aarch64.sme.write.hor.vg2.nxv2f64(i32, , ) + +declare void @llvm.aarch64.sme.write.ver.vg2.nxv16i8(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv8i16(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv8f16(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv8bf16(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv4i32(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv4f32(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv2i64(i32, , ) +declare void @llvm.aarch64.sme.write.ver.vg2.nxv2f64(i32, , ) + +declare void @llvm.aarch64.sme.write.hor.vg4.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv4i32(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv2i64(i32, , , , ) +declare void @llvm.aarch64.sme.write.hor.vg4.nxv2f64(i32, , , , ) + +declare void @llvm.aarch64.sme.write.ver.vg4.nxv16i8(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv4i32(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv4f32(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv2i64(i32, , , , ) +declare void @llvm.aarch64.sme.write.ver.vg4.nxv2f64(i32, , , , ) + +declare void @llvm.aarch64.sme.write.vg1x2.nxv2i64(i32, , ) +declare void @llvm.aarch64.sme.write.vg1x2.nxv2f64(i32, , ) + +declare void @llvm.aarch64.sme.write.vg1x4.nxv2i64(i32, , , , ) +declare void @llvm.aarch64.sme.write.vg1x4.nxv2f64(i32, , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll @@ -0,0 +1,1322 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+bf16 -verify-machineinstrs < %s | FileCheck %s + +; +; BF/F/S/UMLAL x1 (SINGLE) +; + +define void @multi_vector_add_single_vg2x1_bf16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x1_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: bfmlal za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: bfmlal za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8bf16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8bf16(i32 %slice.14, %zn, %zm) + ret void +} + +define void @multi_vector_add_single_vg2x1_f16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: fmlal za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: fmlal za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8f16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8f16(i32 %slice.14, %zn, %zm) + ret void +} + +define void @multi_vector_add_single_vg2x1_s16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x1_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: smlal za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: smlal za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.single.vg2x1.nxv8i16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.smlal.single.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm) + ret void +} + +define void @multi_vector_add_single_vg2x1_u16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x1_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: umlal za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: umlal za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.single.vg2x1.nxv8i16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.umlal.single.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm) + ret void +} + +; +; BF/F/S/UMLSL x1 (SINGLE) +; + +define void @multi_vector_sub_single_vg2x1_bf16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x1_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: bfmlsl za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32 %slice.14, %zn, %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x1_f16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: fmlsl za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: fmlsl za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32 %slice.14, %zn, %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x1_s16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x1_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: smlsl za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: smlsl za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x1_u16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x1_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: umlsl za.s[w8, 0:1], z0.h, z1.h +; CHECK-NEXT: umlsl za.s[w8, 14:15], z0.h, z1.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 %slice, %zn, %zm) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm) + ret void +} + +; +; BF/F/S/UMLAL x2 (SINGLE) +; + +define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +define void @multi_vector_add_single_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +; +; BF/F/S/UMLSL x2 (SINGLE) +; + +define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, %zm) + ret void +} + +; +; BF/F/S/UMLAL x4 (SINGLE) +; + +define void @multi_vector_add_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_add_single_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 %slice, + %zn0, %zn1, %zn2, %zn2, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn2, + %zm) + ret void +} + +define void @multi_vector_add_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_add_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_single_vg2x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +; +; BF/F/S/UMLSL x4 (SINGLE) +; + +define void @multi_vector_sub_single_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +define void @multi_vector_sub_single_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_single_vg2x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm) + ret void +} + +; +; BF/F/S/UMLAL x2 (MULTI) +; + +define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8bf16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8bf16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8f16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8f16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.multi.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlal.multi.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.multi.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlal.multi.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +; +; BF/F/S/UMLSL x2 (MULTI) +; + +define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8bf16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8bf16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8f16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8f16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.multi.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlsl.multi.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { +; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, { z2.h, z3.h } +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.multi.vg2x2.nxv8i16(i32 %slice, %zn0, %zn1, + %zm0, %zm1) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlsl.multi.vg2x2.nxv8i16(i32 %slice.6, %zn0, %zn1, + %zm0, %zm1) + ret void +} + +; +; BF/F/S/UMLAL x4 (MULTI) +; + +define void @multi_vector_add_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_multi_vg2x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8bf16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +define void @multi_vector_add_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_multi_vg2x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8f16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +define void @multi_vector_add_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_multi_vg2x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.smlal.multi.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlal.multi.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +define void @multi_vector_add_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_add_multi_vg2x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.umlal.multi.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlal.multi.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +; +; BF/F/S/UMLSL x4 (MULTI) +; + +define void @multi_vector_sub_multi_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_multi_vg2x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8bf16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +define void @multi_vector_sub_multi_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_multi_vg2x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8f16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +define void @multi_vector_sub_multi_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_multi_vg2x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.smlsl.multi.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlsl.multi.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +define void @multi_vector_sub_multi_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, +; CHECK-LABEL: multi_vector_sub_multi_vg2x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z7 killed $z7 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z6 killed $z6 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z5 killed $z5 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z4 killed $z4 killed $z4_z5_z6_z7 def $z4_z5_z6_z7 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, { z4.h - z7.h } +; CHECK-NEXT: ret + %zm0, %zm1, %zm2, %zm3) { + call void @llvm.aarch64.sme.umlsl.multi.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlsl.multi.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm0, %zm1, %zm2, %zm3) + ret void +} + +; +; BF/F/S/UMLAL x1 (INDEXED) +; + +define void @multi_vector_add_lane_vg2x1_f16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: fmlal za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: fmlal za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8f16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8f16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x1_bf16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x1_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: bfmlal za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: bfmlal za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8bf16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8bf16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x1_s16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x1_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: smlal za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: smlal za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.lane.vg2x1.nxv8i16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.smlal.lane.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x1_u16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x1_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: umlal za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: umlal za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.lane.vg2x1.nxv8i16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.umlal.lane.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +; +; BF/F/S/UMLSL x1 (INDEXED) +; + +define void @multi_vector_sub_lane_vg2x1_f16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x1_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: fmlsl za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: fmlsl za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x1_bf16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x1_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: bfmlsl za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x1_s16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x1_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: smlsl za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: smlsl za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x1_u16(i32 %slice, %zn, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x1_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: umlsl za.s[w8, 0:1], z0.h, z1.h[0] +; CHECK-NEXT: umlsl za.s[w8, 14:15], z0.h, z1.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 %slice, %zn, %zm, i32 0) + %slice.14 = add i32 %slice, 14 + call void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32 %slice.14, %zn, %zm, i32 7) + ret void +} + +; +; BF/F/S/UMLAL x2 (INDEXED) +; + +define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +; +; BF/F/S/UMLSL x2 (INDEXED) +; + +define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] +; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 %slice, + %zn0, %zn1, %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zm, i32 7) + ret void +} + +; +; BF/F/S/UMLAL x4 (INDEXED) +; + +define void @multi_vector_add_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: smlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +define void @multi_vector_add_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_add_lane_vg2x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umlal za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: umlal za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +; +; BF/F/S/UMLSL x4 (INDEXED) +; + +define void @multi_vector_sub_lane_vg2x4_f16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x4_bf16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x4_s16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +define void @multi_vector_sub_lane_vg2x4_u16(i32 %slice, %zn0, %zn1, %zn2, %zn3, %zm) { +; CHECK-LABEL: multi_vector_sub_lane_vg2x4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx4], { z0.h - z3.h }, z4.h[0] +; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx4], { z0.h - z3.h }, z4.h[7] +; CHECK-NEXT: ret + call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 %slice, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 0) + %slice.6 = add i32 %slice, 6 + call void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32 %slice.6, + %zn0, %zn1, %zn2, %zn3, + %zm, i32 7) + ret void +} + +declare void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8bf16(i32, , ) +declare void @llvm.aarch64.sme.fmlal.single.vg2x1.nxv8f16(i32, , ) +declare void @llvm.aarch64.sme.smlal.single.vg2x1.nxv8i16(i32, , ) +declare void @llvm.aarch64.sme.umlal.single.vg2x1.nxv8i16(i32, , ) + +declare void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8bf16(i32, , ) +declare void @llvm.aarch64.sme.fmlsl.single.vg2x1.nxv8f16(i32, , ) +declare void @llvm.aarch64.sme.smlsl.single.vg2x1.nxv8i16(i32, , ) +declare void @llvm.aarch64.sme.umlsl.single.vg2x1.nxv8i16(i32, , ) + +declare void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8bf16(i32, , , ) +declare void @llvm.aarch64.sme.fmlal.single.vg2x2.nxv8f16(i32, , , ) +declare void @llvm.aarch64.sme.smlal.single.vg2x2.nxv8i16(i32, , , ) +declare void @llvm.aarch64.sme.umlal.single.vg2x2.nxv8i16(i32, , , ) + +declare void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8bf16(i32, , , ) +declare void @llvm.aarch64.sme.fmlsl.single.vg2x2.nxv8f16(i32, , , ) +declare void @llvm.aarch64.sme.smlsl.single.vg2x2.nxv8i16(i32, , , ) +declare void @llvm.aarch64.sme.umlsl.single.vg2x2.nxv8i16(i32, , , ) + +declare void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8bf16(i32, , , + , , ) +declare void @llvm.aarch64.sme.fmlal.single.vg2x4.nxv8f16(i32, , , + , , ) +declare void @llvm.aarch64.sme.smlal.single.vg2x4.nxv8i16(i32, , , + , , ) +declare void @llvm.aarch64.sme.umlal.single.vg2x4.nxv8i16(i32, , , + , , ) + +declare void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8bf16(i32, , , + , , ) +declare void @llvm.aarch64.sme.fmlsl.single.vg2x4.nxv8f16(i32, , , + , , ) +declare void @llvm.aarch64.sme.smlsl.single.vg2x4.nxv8i16(i32, , , + , , ) +declare void @llvm.aarch64.sme.umlsl.single.vg2x4.nxv8i16(i32, , , + , , ) + +declare void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.fmlal.multi.vg2x2.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.smlal.multi.vg2x2.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.umlal.multi.vg2x2.nxv8i16(i32, , , , ) + +declare void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8bf16(i32, , , , ) +declare void @llvm.aarch64.sme.fmlsl.multi.vg2x2.nxv8f16(i32, , , , ) +declare void @llvm.aarch64.sme.smlsl.multi.vg2x2.nxv8i16(i32, , , , ) +declare void @llvm.aarch64.sme.umlsl.multi.vg2x2.nxv8i16(i32, , , , ) + +declare void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8bf16(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.fmlal.multi.vg2x4.nxv8f16(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.smlal.multi.vg2x4.nxv8i16(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.umlal.multi.vg2x4.nxv8i16(i32, , , , , + , , , ) + +declare void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8bf16(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.fmlsl.multi.vg2x4.nxv8f16(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.smlsl.multi.vg2x4.nxv8i16(i32, , , , , + , , , ) +declare void @llvm.aarch64.sme.umlsl.multi.vg2x4.nxv8i16(i32, , , , , + , , , ) + +declare void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8bf16(i32, , , i32) +declare void @llvm.aarch64.sme.fmlal.lane.vg2x1.nxv8f16(i32, , , i32) +declare void @llvm.aarch64.sme.smlal.lane.vg2x1.nxv8i16(i32, , , i32) +declare void @llvm.aarch64.sme.umlal.lane.vg2x1.nxv8i16(i32, , , i32) + +declare void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8bf16(i32, , , i32) +declare void @llvm.aarch64.sme.fmlsl.lane.vg2x1.nxv8f16(i32, , , i32) +declare void @llvm.aarch64.sme.smlsl.lane.vg2x1.nxv8i16(i32, , , i32) +declare void @llvm.aarch64.sme.umlsl.lane.vg2x1.nxv8i16(i32, , , i32) + +declare void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8bf16(i32, , , , i32) +declare void @llvm.aarch64.sme.fmlal.lane.vg2x2.nxv8f16(i32, , , , i32) +declare void @llvm.aarch64.sme.smlal.lane.vg2x2.nxv8i16(i32, , , , i32) +declare void @llvm.aarch64.sme.umlal.lane.vg2x2.nxv8i16(i32, , , , i32) + +declare void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8bf16(i32, , , , i32) +declare void @llvm.aarch64.sme.fmlsl.lane.vg2x2.nxv8f16(i32, , , , i32) +declare void @llvm.aarch64.sme.smlsl.lane.vg2x2.nxv8i16(i32, , , , i32) +declare void @llvm.aarch64.sme.umlsl.lane.vg2x2.nxv8i16(i32, , , , i32) + +declare void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8bf16(i32, , , , , , i32) +declare void @llvm.aarch64.sme.fmlal.lane.vg2x4.nxv8f16(i32, , , , , , i32) +declare void @llvm.aarch64.sme.smlal.lane.vg2x4.nxv8i16(i32, , , , , , i32) +declare void @llvm.aarch64.sme.umlal.lane.vg2x4.nxv8i16(i32, , , , , , i32) + +declare void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8bf16(i32, , , , , , i32) +declare void @llvm.aarch64.sme.fmlsl.lane.vg2x4.nxv8f16(i32, , , , , , i32) +declare void @llvm.aarch64.sme.smlsl.lane.vg2x4.nxv8i16(i32, , , , , , i32) +declare void @llvm.aarch64.sme.umlsl.lane.vg2x4.nxv8i16(i32, , , , , , i32) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-qrshr.ll @@ -0,0 +1,237 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+sme2 -mattr=+bf16 -verify-machineinstrs < %s | FileCheck %s + +; +; S/UQRSHR x2 +; + +define @multi_vector_sat_shift_narrow_vgx2_s16( %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_vgx2_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sqrshr z0.h, { z0.s, z1.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshr.vgx2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +define @multi_vector_sat_shift_narrow_vgx2_u16( %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_vgx2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: uqrshr z0.h, { z0.s, z1.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshr.vgx2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +; +; S/UQRSHR x4 +; + +define @multi_vector_sat_shift_narrow_vgx4_s8( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_vgx4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshr z0.b, { z0.s - z3.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshr.vgx4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_vgx4_s16( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_vgx4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshr z0.h, { z0.d - z3.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshr.vgx4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +define @multi_vector_sat_shift_narrow_vgx4_u8( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_vgx4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uqrshr z0.b, { z0.s - z3.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshr.vgx4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_vgx4_u16( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_vgx4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uqrshr z0.h, { z0.d - z3.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshr.vgx4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +; S/UQRSHRN x4 + +define @multi_vector_sat_shift_narrow_interleave_vgx4_s8( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_vgx4_s8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshrn z0.b, { z0.s - z3.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrn.vgx4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_vgx4_s16( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_vgx4_s16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshrn z0.h, { z0.d - z3.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrn.vgx4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_vgx4_u8( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_vgx4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uqrshrn z0.b, { z0.s - z3.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshrn.vgx4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_narrow_interleave_vgx4_u16( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_narrow_interleave_vgx4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: uqrshrn z0.h, { z0.d - z3.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.uqrshrn.vgx4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +; SQRSHRU x2 + +define @multi_vector_sat_shift_unsigned_narrow_vgx2_u16( %zn1, %zn2) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_vgx2_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: sqrshru z0.h, { z0.s, z1.s }, #16 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshru.vgx2.nxv8i16( %zn1, %zn2, i32 16) + ret %res +} + +; SQRSHRU x4 + +define @multi_vector_sat_shift_unsigned_narrow_vgx4_u8( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_vgx4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshru z0.b, { z0.s - z3.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshru.vgx4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_unsigned_narrow_vgx4_u16( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_vgx4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshru z0.h, { z0.d - z3.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshru.vgx4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +; SQRSHRUN x4 + +define @multi_vector_sat_shift_unsigned_narrow_interleave_vgx4_u8( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_vgx4_u8: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshrun z0.b, { z0.s - z3.s }, #32 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrun.vgx4.nxv16i8( %zn1, %zn2, %zn3, %zn4, i32 32) + ret %res +} + +define @multi_vector_sat_shift_unsigned_narrow_interleave_vgx4_u16( %zn1, %zn2, %zn3, %zn4) { +; CHECK-LABEL: multi_vector_sat_shift_unsigned_narrow_interleave_vgx4_u16: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: sqrshrun z0.h, { z0.d - z3.d }, #64 +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.sqrshrun.vgx4.nxv8i16( %zn1, %zn2, %zn3, %zn4, i32 64) + ret %res +} + +declare @llvm.aarch64.sve.sqrshr.vgx2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshr.vgx4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshr.vgx4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.uqrshr.vgx2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.uqrshr.vgx4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.uqrshr.vgx4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.sqrshrn.vgx2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshrn.vgx4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshrn.vgx4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.uqrshrn.vgx2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.uqrshrn.vgx4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.uqrshrn.vgx4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.sqrshru.vgx2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshru.vgx4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshru.vgx4.nxv8i16(, , , , i32) + +declare @llvm.aarch64.sve.sqrshrun.vgx2.nxv8i16(, , i32) + +declare @llvm.aarch64.sve.sqrshrun.vgx4.nxv16i8(, , , , i32) +declare @llvm.aarch64.sve.sqrshrun.vgx4.nxv8i16(, , , , i32) diff --git a/llvm/test/CodeGen/AArch64/streaming-mode-no-reorder.ll b/llvm/test/CodeGen/AArch64/streaming-mode-no-reorder.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/streaming-mode-no-reorder.ll @@ -0,0 +1,20 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -verify-machineinstrs < %s | FileCheck %s + +; Test SMSTART and SMSTOP are marked as scheduling barriers and are not +; rescheduled around function calls. + +declare void @streaming_callee() "aarch64_pstate_sm_enabled"; + +@arr = global [8 x i8] zeroinitializer, align 2 + +define void @clear_arr() { +; CHECK-LABEL: clear_arr: +; CHECK: smstart sm +; CHECK-NEXT: bl streaming_callee +; CHECK-NEXT: smstop sm + call void @streaming_callee() + call void @llvm.memset.p0i8.i64(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @arr, i64 0, i64 0), i8 0, i64 8, i1 false) + ret void; +} + +declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i1) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-loads.ll @@ -0,0 +1,647 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +; == Normal Multi-Vector Consecutive Loads == + +define { , } @ld1_vg2_i8(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1b { z0.b, z1.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_i16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_i32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_i64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_f16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8f16(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_bf16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8bf16(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_f32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4f32(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ld1_vg2_f64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2f64(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ld1_vg2_i8_z0_taken(aarch64_svcount %pn, ptr %ptr, %val) { +; CHECK-LABEL: ld1_vg2_i8_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1b { z2.b, z3.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.b, z0.b, z2.b +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount %pn, ptr %ptr); + %ld1_0 = extractvalue { , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + +define { , , , } @ld1_vg4_i8(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1b { z0.b - z3.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_i16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_i32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_i64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_f16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8f16(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_bf16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8bf16(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_f32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4f32(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ld1_vg4_f64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ld1_vg4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2f64(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ld1_vg4_i16_z0_taken(aarch64_svcount %pn, ptr %ptr, %val) { +; CHECK-LABEL: ld1_vg4_i16_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ld1h { z4.h - z7.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.h, z0.h, z4.h +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount %pn, ptr %ptr); + %ld1_0 = extractvalue { , , , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + + +; == Non-temporal Multi-Vector Consecutive Loads == + +define { , } @ldnt1_vg2_i8(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1b { z0.b, z1.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_i16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_i32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_i64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_f16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8f16(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_bf16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8bf16(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_f32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s, z1.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4f32(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +define { , } @ldnt1_vg2_f64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d, z1.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2f64(aarch64_svcount %pn, ptr %ptr); + ret { , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ldnt1_vg2_i32_z0_taken(aarch64_svcount %pn, ptr %ptr, %val) { +; CHECK-LABEL: ldnt1_vg2_i32_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z2.s, z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.s, z0.s, z2.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount %pn, ptr %ptr); + %ld1_0 = extractvalue { , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + +define { , , , } @ldnt1_vg4_i8(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1b { z0.b - z3.b }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_i16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_i32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_i64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_f16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8f16(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_bf16(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8bf16(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_f32(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1w { z0.s - z3.s }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4f32(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +define { , , , } @ldnt1_vg4_f64(aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: ldnt1_vg4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z0.d - z3.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2f64(aarch64_svcount %pn, ptr %ptr); + ret { , , , } %res +} + +; Test to ensure we load into the correct registers for the instruction +define @ldnt1_vg4_i64_z0_taken(aarch64_svcount %pn, ptr %ptr, %val) { +; CHECK-LABEL: ldnt1_vg4_i64_z0_taken: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: ldnt1d { z4.d - z7.d }, pn8/z, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: add z0.d, z0.d, z4.d +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %ld1 = call { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount %pn, ptr %ptr); + %ld1_0 = extractvalue { , , , } %ld1, 0 + %res = add %val, %ld1_0 + ret %res +} + +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2i64(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4i32(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8i16(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv16i8(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv2f64(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv4f32(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8f16(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ld1.pn.vg2.nxv8bf16(aarch64_svcount, ptr) + +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2i64(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4i32(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8i16(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv16i8(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv2f64(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv4f32(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8f16(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ld1.pn.vg4.nxv8bf16(aarch64_svcount, ptr) + +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2i64(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4i32(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8i16(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv16i8(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv2f64(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv4f32(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8f16(aarch64_svcount, ptr) +declare { , } @llvm.aarch64.sve.ldnt1.pn.vg2.nxv8bf16(aarch64_svcount, ptr) + +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2i64(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4i32(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8i16(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv16i8(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv2f64(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv4f32(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8f16(aarch64_svcount, ptr) +declare { , , , } @llvm.aarch64.sve.ldnt1.pn.vg4.nxv8bf16(aarch64_svcount, ptr) diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-predicate-as-counter.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 -mattr=+sve2p1 < %s | FileCheck %s + +define @pext_b(aarch64_svcount %x) nounwind { +; CHECK-LABEL: pext_b: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.b, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv16i1(aarch64_svcount %x, i32 2) + ret %res +} + +define @pext_h(aarch64_svcount %x) nounwind { +; CHECK-LABEL: pext_h: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.h, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv8i1(aarch64_svcount %x, i32 2) + ret %res +} + +define @pext_s(aarch64_svcount %x) nounwind { +; CHECK-LABEL: pext_s: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.s, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv4i1(aarch64_svcount %x, i32 2) + ret %res +} + +define @pext_d(aarch64_svcount %x) nounwind { +; CHECK-LABEL: pext_d: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: pext p0.d, pn8[2] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + %res = call @llvm.aarch64.sve.pext.nxv2i1(aarch64_svcount %x, i32 2) + ret %res +} + +declare @llvm.aarch64.sve.pext.nxv16i1(aarch64_svcount, i32) +declare @llvm.aarch64.sve.pext.nxv8i1(aarch64_svcount, i32) +declare @llvm.aarch64.sve.pext.nxv4i1(aarch64_svcount, i32) +declare @llvm.aarch64.sve.pext.nxv2i1(aarch64_svcount, i32) + +define aarch64_svcount @ptrue_b() nounwind { +; CHECK-LABEL: ptrue_b: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.b +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call aarch64_svcount @llvm.aarch64.sve.ptrue.c8() + ret aarch64_svcount %res +} + +define aarch64_svcount @ptrue_h() nounwind { +; CHECK-LABEL: ptrue_h: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.h +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call aarch64_svcount @llvm.aarch64.sve.ptrue.c16() + ret aarch64_svcount %res +} + +define aarch64_svcount @ptrue_s() nounwind { +; CHECK-LABEL: ptrue_s: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.s +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call aarch64_svcount @llvm.aarch64.sve.ptrue.c32() + ret aarch64_svcount %res +} + +define aarch64_svcount @ptrue_d() nounwind { +; CHECK-LABEL: ptrue_d: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue pn8.d +; CHECK-NEXT: mov p0.b, p8.b +; CHECK-NEXT: ret + %res = call aarch64_svcount @llvm.aarch64.sve.ptrue.c64() + ret aarch64_svcount %res +} + +declare aarch64_svcount @llvm.aarch64.sve.ptrue.c8() +declare aarch64_svcount @llvm.aarch64.sve.ptrue.c16() +declare aarch64_svcount @llvm.aarch64.sve.ptrue.c32() +declare aarch64_svcount @llvm.aarch64.sve.ptrue.c64() diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll @@ -0,0 +1,731 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s + +; == Normal Multi-Vector Consecutive Stores == + +define void @st1_vg2_i8( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1b { z0.b, z1.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_i16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1h { z0.h, z1.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_i32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1w { z0.s, z1.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_i64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1d { z0.d, z1.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_f16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1h { z0.h, z1.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv8f16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_bf16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1h { z0.h, z1.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv8bf16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_f32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1w { z0.s, z1.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv4f32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg2_f64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: st1d { z0.d, z1.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv2f64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +; Test to show we choose the correct registers for the instruction +define void @st1_vg2_i16_swap_regs( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg2_i16_swap_regs: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16( %zn1, %zn0, aarch64_svcount %pn, ptr %ptr); + ret void +} + + +define void @st1_vg4_i8( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1b { z0.b - z3.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_i16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1h { z0.h - z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_i32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1w { z0.s - z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_i64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1d { z0.d - z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_f16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1h { z0.h - z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv8f16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_bf16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1h { z0.h - z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv8bf16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_f32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1w { z0.s - z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv4f32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @st1_vg4_f64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: st1d { z0.d - z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv2f64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +; Test to show we choose the correct registers for the instruction +define void @st1_vg4_i64_invert_regs( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: st1_vg4_i64_invert_regs: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64( %zn3, %zn2, %zn1, %zn0, aarch64_svcount %pn, ptr %ptr); + ret void +} + +; == Non-temporal Multi-Vector Consecutive Loads == + +define void @stnt1_vg2_i8( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1b { z0.b, z1.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_i16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1h { z0.h, z1.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_i32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1w { z0.s, z1.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_i64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1d { z0.d, z1.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_f16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1h { z0.h, z1.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8f16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_bf16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1h { z0.h, z1.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8bf16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_f32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1w { z0.s, z1.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4f32( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg2_f64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: stnt1d { z0.d, z1.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2f64( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +; Test to show we choose the correct registers for the instruction +define void @stnt1_vg2_i16_bad_reg_offset( %ignored, %zn0, %zn1, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg2_i16_bad_reg_offset: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z3.d, z2.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16( %zn0, %zn1, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_i8( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1b { z0.b - z3.b }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_i16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1h { z0.h - z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_i32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1w { z0.s - z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_i64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1d { z0.d - z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_f16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_f16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1h { z0.h - z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8f16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_bf16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_bf16: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1h { z0.h - z3.h }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8bf16( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_f32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1w { z0.s - z3.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4f32( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + +define void @stnt1_vg4_f64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_f64: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: stnt1d { z0.d - z3.d }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2f64( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr); + ret void +} + + +; Test to show we choose the correct registers for the instruction +define void @stnt1_vg4_i32_rotated_regs( %zn0, %zn1, %zn2, %zn3, aarch64_svcount %pn, ptr %ptr) nounwind { +; CHECK-LABEL: stnt1_vg4_i32_rotated_regs: +; CHECK: // %bb.0: +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov z7.d, z0.d +; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] +; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret + call void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32( %zn1, %zn2, %zn3, %zn0, aarch64_svcount %pn, ptr %ptr); + ret void +} + +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv16i8(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv8i16(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv4i32(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv2i64(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv8f16(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv8bf16(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv4f32(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg2.nxv2f64(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv16i8(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8i16(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4i32(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2i64(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8f16(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv8bf16(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv4f32(, , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg2.nxv2f64(, , aarch64_svcount, ptr) + + +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv16i8(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv8i16(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv4i32(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv2i64(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv8f16(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv8bf16(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv4f32(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.st1.pn.vg4.nxv2f64(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv16i8(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8i16(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4i32(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2i64(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8f16(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv8bf16(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv4f32(, , , , aarch64_svcount, ptr) +declare void @llvm.aarch64.sve.stnt1.pn.vg4.nxv2f64(, , , , aarch64_svcount, ptr) diff --git a/llvm/test/MC/AArch64/SME/smopa-diagnostics.s b/llvm/test/MC/AArch64/SME/smopa-diagnostics.s --- a/llvm/test/MC/AArch64/SME/smopa-diagnostics.s +++ b/llvm/test/MC/AArch64/SME/smopa-diagnostics.s @@ -67,7 +67,7 @@ // expected: .s => .b, .d => .h smopa za0.s, p0/m, p0/m, z0.h, z0.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: smopa za0.s, p0/m, p0/m, z0.h, z0.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME/smops-diagnostics.s b/llvm/test/MC/AArch64/SME/smops-diagnostics.s --- a/llvm/test/MC/AArch64/SME/smops-diagnostics.s +++ b/llvm/test/MC/AArch64/SME/smops-diagnostics.s @@ -67,7 +67,7 @@ // expected: .s => .b, .d => .h smops za0.s, p0/m, p0/m, z0.h, z0.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: smops za0.s, p0/m, p0/m, z0.h, z0.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME/umopa-diagnostics.s b/llvm/test/MC/AArch64/SME/umopa-diagnostics.s --- a/llvm/test/MC/AArch64/SME/umopa-diagnostics.s +++ b/llvm/test/MC/AArch64/SME/umopa-diagnostics.s @@ -67,7 +67,7 @@ // expected: .s => .b, .d => .h umopa za0.s, p0/m, p0/m, z0.h, z0.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: umopa za0.s, p0/m, p0/m, z0.h, z0.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME/umops-diagnostics.s b/llvm/test/MC/AArch64/SME/umops-diagnostics.s --- a/llvm/test/MC/AArch64/SME/umops-diagnostics.s +++ b/llvm/test/MC/AArch64/SME/umops-diagnostics.s @@ -67,7 +67,7 @@ // expected: .s => .b, .d => .h umops za0.s, p0/m, p0/m, z0.h, z0.b -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: umops za0.s, p0/m, p0/m, z0.h, z0.b // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/add-diagnostics.s b/llvm/test/MC/AArch64/SME2/add-diagnostics.s --- a/llvm/test/MC/AArch64/SME2/add-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/add-diagnostics.s @@ -82,7 +82,7 @@ // Invalid vector list. add za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: add za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: @@ -120,7 +120,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: add {z0.s,z1.s}, {z0.s,z2.s}, z15.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: add {z0.s,z1.s}, {z0.s,z2.s}, z15.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/bfdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfdot-diagnostics.s @@ -0,0 +1,79 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +bfdot za.s[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfdot za.s[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot za.s[w8, 0, vgx4], {z1.h-z5.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: bfdot za.s[w8, 0, vgx4], {z1.h-z5.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot za.s[w8, 0, vgx2], {z0.h-z1.h}, {z3.h-z4.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: bfdot za.s[w8, 0, vgx2], {z0.h-z1.h}, {z3.h-z4.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single vector register + +bfdot za.s[w8, 0, vgx4], {z0.h-z3.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: bfdot za.s[w8, 0, vgx4], {z0.h-z3.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +bfdot za.s[w7, 0, vgx2], {z0.h-z1.h}, {z3.h-z4.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: bfdot za.s[w7, 0, vgx2], {z0.h-z1.h}, {z3.h-z4.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot za.s[w12, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: bfdot za.s[w12, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +bfdot za.s[w8, -1, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: bfdot za.s[w8, -1, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot za.s[w8, 8, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: bfdot za.s[w8, 8, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot za.s[w8, -1, vgx2], {z0.h-z1.h}, {z3.h-z4.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: bfdot za.s[w8, -1, vgx2], {z0.h-z1.h}, {z3.h-z4.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +bfdot za.h[w8, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: bfdot za.h[w8, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +bfdot za.s[w8, 0, vgx4], {z0.h-z3.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: bfdot za.s[w8, 0, vgx4], {z0.h-z3.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfdot za.s[w8, 0, vgx4], {z0.h-z3.h}, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: bfdot za.s[w8, 0, vgx4], {z0.h-z3.h}, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/bfdot.s b/llvm/test/MC/AArch64/SME2/bfdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfdot.s @@ -0,0 +1,883 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +bfdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-00100000-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x10,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201010 + +bfdot za.s[w8, 0], {z0.h, z1.h}, z0.h // 11000001-00100000-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x10,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201010 + +bfdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-00100101-01010001-01010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x55,0x51,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255155 + +bfdot za.s[w10, 5], {z10.h, z11.h}, z5.h // 11000001-00100101-01010001-01010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x55,0x51,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255155 + +bfdot za.s[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-00101000-01110001-10110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb7,0x71,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12871b7 + +bfdot za.s[w11, 7], {z13.h, z14.h}, z8.h // 11000001-00101000-01110001-10110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb7,0x71,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12871b7 + +bfdot za.s[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-00101111-01110011-11110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf7,0x73,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f73f7 + +bfdot za.s[w11, 7], {z31.h, z0.h}, z15.h // 11000001-00101111-01110011-11110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf7,0x73,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f73f7 + +bfdot za.s[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-00100000-00010010-00110101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x35,0x12,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201235 + +bfdot za.s[w8, 5], {z17.h, z18.h}, z0.h // 11000001-00100000-00010010-00110101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x35,0x12,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201235 + +bfdot za.s[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-00101110-00010000-00110001 +// CHECK-INST: bfdot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x10,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1031 + +bfdot za.s[w8, 1], {z1.h, z2.h}, z14.h // 11000001-00101110-00010000-00110001 +// CHECK-INST: bfdot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x10,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1031 + +bfdot za.s[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-00100100-01010010-01110000 +// CHECK-INST: bfdot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x52,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245270 + +bfdot za.s[w10, 0], {z19.h, z20.h}, z4.h // 11000001-00100100-01010010-01110000 +// CHECK-INST: bfdot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x52,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245270 + +bfdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-00100010-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x11,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221190 + +bfdot za.s[w8, 0], {z12.h, z13.h}, z2.h // 11000001-00100010-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x11,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221190 + +bfdot za.s[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-00101010-01010000-00110001 +// CHECK-INST: bfdot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x50,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5031 + +bfdot za.s[w10, 1], {z1.h, z2.h}, z10.h // 11000001-00101010-01010000-00110001 +// CHECK-INST: bfdot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x50,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5031 + +bfdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-00101110-00010010-11010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd5,0x12,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e12d5 + +bfdot za.s[w8, 5], {z22.h, z23.h}, z14.h // 11000001-00101110-00010010-11010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd5,0x12,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e12d5 + +bfdot za.s[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-00100001-01110001-00110010 +// CHECK-INST: bfdot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x32,0x71,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217132 + +bfdot za.s[w11, 2], {z9.h, z10.h}, z1.h // 11000001-00100001-01110001-00110010 +// CHECK-INST: bfdot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x32,0x71,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217132 + +bfdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-00101011-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x97,0x31,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3197 + +bfdot za.s[w9, 7], {z12.h, z13.h}, z11.h // 11000001-00101011-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x97,0x31,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3197 + + +bfdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501018 + +bfdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501018 + +bfdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01011101 +// CHECK-INST: bfdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x5d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155555d + +bfdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01011101 +// CHECK-INST: bfdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x5d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155555d + +bfdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x9f,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d9f + +bfdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x9f,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d9f + +bfdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xdf,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fdf + +bfdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xdf,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fdf + +bfdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x1d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e1d + +bfdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x1d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e1d + +bfdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00011001 +// CHECK-INST: bfdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1419 + +bfdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00011001 +// CHECK-INST: bfdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1419 + +bfdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01011000 +// CHECK-INST: bfdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x58,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545658 + +bfdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01011000 +// CHECK-INST: bfdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x58,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545658 + +bfdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x98,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521998 + +bfdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x98,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521998 + +bfdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00011001 +// CHECK-INST: bfdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x19,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5819 + +bfdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00011001 +// CHECK-INST: bfdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x19,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5819 + +bfdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1add + +bfdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1add + +bfdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00011010 +// CHECK-INST: bfdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151751a + +bfdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00011010 +// CHECK-INST: bfdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151751a + +bfdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10011111 +// CHECK-INST: bfdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x9f,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b399f + +bfdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10011111 +// CHECK-INST: bfdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x9f,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b399f + + +bfdot za.s[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-10100000-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x10,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01010 + +bfdot za.s[w8, 0], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-10100000-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x10,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01010 + +bfdot za.s[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-10110100-01010001-01010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x55,0x51,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45155 + +bfdot za.s[w10, 5], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-10110100-01010001-01010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x55,0x51,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45155 + +bfdot za.s[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-10101000-01110001-10010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x97,0x71,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87197 + +bfdot za.s[w11, 7], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-10101000-01110001-10010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x97,0x71,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87197 + +bfdot za.s[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-10111110-01110011-11010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd7,0x73,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be73d7 + +bfdot za.s[w11, 7], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-10111110-01110011-11010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd7,0x73,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be73d7 + +bfdot za.s[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-10110000-00010010-00010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x15,0x12,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01215 + +bfdot za.s[w8, 5], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-10110000-00010010-00010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x15,0x12,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01215 + +bfdot za.s[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-10111110-00010000-00010001 +// CHECK-INST: bfdot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x10,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1011 + +bfdot za.s[w8, 1], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-10111110-00010000-00010001 +// CHECK-INST: bfdot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x10,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1011 + +bfdot za.s[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-10110100-01010010-01010000 +// CHECK-INST: bfdot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x52,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45250 + +bfdot za.s[w10, 0], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-10110100-01010010-01010000 +// CHECK-INST: bfdot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x52,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45250 + +bfdot za.s[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-10100010-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x11,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21190 + +bfdot za.s[w8, 0], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-10100010-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x11,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21190 + +bfdot za.s[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-10111010-01010000-00010001 +// CHECK-INST: bfdot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x50,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5011 + +bfdot za.s[w10, 1], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-10111010-01010000-00010001 +// CHECK-INST: bfdot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x50,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5011 + +bfdot za.s[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-10111110-00010010-11010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd5,0x12,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be12d5 + +bfdot za.s[w8, 5], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-10111110-00010010-11010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd5,0x12,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be12d5 + +bfdot za.s[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-10100000-01110001-00010010 +// CHECK-INST: bfdot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x12,0x71,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07112 + +bfdot za.s[w11, 2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-10100000-01110001-00010010 +// CHECK-INST: bfdot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x12,0x71,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07112 + +bfdot za.s[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-10101010-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x97,0x31,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3197 + +bfdot za.s[w9, 7], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-10101010-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x97,0x31,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3197 + + +bfdot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-00110000-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x10,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301010 + +bfdot za.s[w8, 0], {z0.h - z3.h}, z0.h // 11000001-00110000-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x10,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301010 + +bfdot za.s[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-00110101-01010001-01010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x55,0x51,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355155 + +bfdot za.s[w10, 5], {z10.h - z13.h}, z5.h // 11000001-00110101-01010001-01010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x55,0x51,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355155 + +bfdot za.s[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-00111000-01110001-10110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb7,0x71,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13871b7 + +bfdot za.s[w11, 7], {z13.h - z16.h}, z8.h // 11000001-00111000-01110001-10110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb7,0x71,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13871b7 + +bfdot za.s[w11, 7, vgx4], {z31.h - z2.h}, z15.h // 11000001-00111111-01110011-11110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf7,0x73,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f73f7 + +bfdot za.s[w11, 7], {z31.h - z2.h}, z15.h // 11000001-00111111-01110011-11110111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf7,0x73,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f73f7 + +bfdot za.s[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-00110000-00010010-00110101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x35,0x12,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301235 + +bfdot za.s[w8, 5], {z17.h - z20.h}, z0.h // 11000001-00110000-00010010-00110101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x35,0x12,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301235 + +bfdot za.s[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-00111110-00010000-00110001 +// CHECK-INST: bfdot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x10,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1031 + +bfdot za.s[w8, 1], {z1.h - z4.h}, z14.h // 11000001-00111110-00010000-00110001 +// CHECK-INST: bfdot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x10,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1031 + +bfdot za.s[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-00110100-01010010-01110000 +// CHECK-INST: bfdot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x52,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345270 + +bfdot za.s[w10, 0], {z19.h - z22.h}, z4.h // 11000001-00110100-01010010-01110000 +// CHECK-INST: bfdot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x52,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345270 + +bfdot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-00110010-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x11,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321190 + +bfdot za.s[w8, 0], {z12.h - z15.h}, z2.h // 11000001-00110010-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x11,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321190 + +bfdot za.s[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-00111010-01010000-00110001 +// CHECK-INST: bfdot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x50,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5031 + +bfdot za.s[w10, 1], {z1.h - z4.h}, z10.h // 11000001-00111010-01010000-00110001 +// CHECK-INST: bfdot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x50,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5031 + +bfdot za.s[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-00111110-00010010-11010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd5,0x12,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e12d5 + +bfdot za.s[w8, 5], {z22.h - z25.h}, z14.h // 11000001-00111110-00010010-11010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd5,0x12,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e12d5 + +bfdot za.s[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-00110001-01110001-00110010 +// CHECK-INST: bfdot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x32,0x71,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317132 + +bfdot za.s[w11, 2], {z9.h - z12.h}, z1.h // 11000001-00110001-01110001-00110010 +// CHECK-INST: bfdot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x32,0x71,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317132 + +bfdot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-00111011-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x97,0x31,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3197 + +bfdot za.s[w9, 7], {z12.h - z15.h}, z11.h // 11000001-00111011-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x97,0x31,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3197 + + +bfdot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509018 + +bfdot za.s[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509018 + +bfdot za.s[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00011101 +// CHECK-INST: bfdot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x1d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d51d + +bfdot za.s[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00011101 +// CHECK-INST: bfdot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x1d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d51d + +bfdot za.s[w11, 7, vgx4], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x9f,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd9f + +bfdot za.s[w11, 7], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x9f,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd9f + +bfdot za.s[w11, 7, vgx4], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x9f,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff9f + +bfdot za.s[w11, 7], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10011111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x9f,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff9f + +bfdot za.s[w8, 5, vgx4], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x1d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e1d + +bfdot za.s[w8, 5], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x1d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e1d + +bfdot za.s[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00011001 +// CHECK-INST: bfdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9419 + +bfdot za.s[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00011001 +// CHECK-INST: bfdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9419 + +bfdot za.s[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00011000 +// CHECK-INST: bfdot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x18,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d618 + +bfdot za.s[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00011000 +// CHECK-INST: bfdot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x18,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d618 + +bfdot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x98,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529998 + +bfdot za.s[w8, 0], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10011000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x98,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529998 + +bfdot za.s[w10, 1, vgx4], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00011001 +// CHECK-INST: bfdot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x19,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad819 + +bfdot za.s[w10, 1], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00011001 +// CHECK-INST: bfdot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x19,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad819 + +bfdot za.s[w8, 5, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x9d,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a9d + +bfdot za.s[w8, 5], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10011101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x9d,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a9d + +bfdot za.s[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00011010 +// CHECK-INST: bfdot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f51a + +bfdot za.s[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00011010 +// CHECK-INST: bfdot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f51a + +bfdot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10011111 +// CHECK-INST: bfdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x9f,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb99f + +bfdot za.s[w9, 7], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10011111 +// CHECK-INST: bfdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x9f,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb99f + + +bfdot za.s[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-10100001-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x10,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11010 + +bfdot za.s[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-10100001-00010000-00010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x10,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11010 + +bfdot za.s[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-10110101-01010001-00010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x15,0x51,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55115 + +bfdot za.s[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-10110101-01010001-00010101 +// CHECK-INST: bfdot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x15,0x51,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55115 + +bfdot za.s[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-01110001-10010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x71,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97197 + +bfdot za.s[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-01110001-10010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x71,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97197 + +bfdot za.s[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-10111101-01110011-10010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x97,0x73,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7397 + +bfdot za.s[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-10111101-01110011-10010111 +// CHECK-INST: bfdot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x97,0x73,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7397 + +bfdot za.s[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-10110001-00010010-00010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x15,0x12,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11215 + +bfdot za.s[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-10110001-00010010-00010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x15,0x12,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11215 + +bfdot za.s[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-10111101-00010000-00010001 +// CHECK-INST: bfdot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x10,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1011 + +bfdot za.s[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-10111101-00010000-00010001 +// CHECK-INST: bfdot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x10,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1011 + +bfdot za.s[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-10110101-01010010-00010000 +// CHECK-INST: bfdot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x52,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55210 + +bfdot za.s[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-10110101-01010010-00010000 +// CHECK-INST: bfdot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x52,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55210 + +bfdot za.s[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-10100001-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x11,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11190 + +bfdot za.s[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-10100001-00010001-10010000 +// CHECK-INST: bfdot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x11,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11190 + +bfdot za.s[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-10111001-01010000-00010001 +// CHECK-INST: bfdot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x50,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95011 + +bfdot za.s[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-10111001-01010000-00010001 +// CHECK-INST: bfdot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x50,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95011 + +bfdot za.s[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-10111101-00010010-10010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x95,0x12,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1295 + +bfdot za.s[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-10111101-00010010-10010101 +// CHECK-INST: bfdot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x95,0x12,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1295 + +bfdot za.s[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-10100001-01110001-00010010 +// CHECK-INST: bfdot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x12,0x71,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17112 + +bfdot za.s[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-10100001-01110001-00010010 +// CHECK-INST: bfdot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x12,0x71,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17112 + +bfdot za.s[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x31,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93197 + +bfdot za.s[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-00110001-10010111 +// CHECK-INST: bfdot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x31,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93197 + diff --git a/llvm/test/MC/AArch64/SME2/bfvdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/bfvdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfvdot-diagnostics.s @@ -0,0 +1,64 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Out of range index offset + +bfvdot za.s[w8, 8, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: bfvdot za.s[w8, 8, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfvdot za.s[w8, -1, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: bfvdot za.s[w8, -1, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +bfvdot za.s[w7, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: bfvdot za.s[w7, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfvdot za.s[w12, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: bfvdot za.s[w12, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector list + +bfvdot za.s[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bfvdot za.s[w8, 0, vgx2], {z1.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], {z1.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Matrix Operand + +bfvdot za.h[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: bfvdot za.h[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector grouping + +bfvdot za.s[w8, 0, vgx4], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bfvdot za.s[w8, 0, vgx4], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid lane index + +bfvdot za.s[w8, 0, vgx2], {z0.h-z1.h}, z0.h[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], {z0.h-z1.h}, z0.h[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/bfvdot.s b/llvm/test/MC/AArch64/SME2/bfvdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bfvdot.s @@ -0,0 +1,158 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +bfvdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00011000 +// CHECK-INST: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500018 + +bfvdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00011000 +// CHECK-INST: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500018 + +bfvdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01011101 +// CHECK-INST: bfvdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x5d,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155455d + +bfvdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01011101 +// CHECK-INST: bfvdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x5d,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155455d + +bfvdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10011111 +// CHECK-INST: bfvdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x9f,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d9f + +bfvdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10011111 +// CHECK-INST: bfvdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x9f,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d9f + +bfvdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11011111 +// CHECK-INST: bfvdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xdf,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fdf + +bfvdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11011111 +// CHECK-INST: bfvdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xdf,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fdf + +bfvdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00011101 +// CHECK-INST: bfvdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x1d,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e1d + +bfvdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00011101 +// CHECK-INST: bfvdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x1d,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e1d + +bfvdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00011001 +// CHECK-INST: bfvdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0419 + +bfvdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00011001 +// CHECK-INST: bfvdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0419 + +bfvdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01011000 +// CHECK-INST: bfvdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x58,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544658 + +bfvdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01011000 +// CHECK-INST: bfvdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x58,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544658 + +bfvdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10011000 +// CHECK-INST: bfvdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x98,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520998 + +bfvdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10011000 +// CHECK-INST: bfvdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x98,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520998 + +bfvdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00011001 +// CHECK-INST: bfvdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x19,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4819 + +bfvdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00011001 +// CHECK-INST: bfvdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x19,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4819 + +bfvdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11011101 +// CHECK-INST: bfvdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0add + +bfvdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11011101 +// CHECK-INST: bfvdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0add + +bfvdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00011010 +// CHECK-INST: bfvdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151651a + +bfvdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00011010 +// CHECK-INST: bfvdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151651a + +bfvdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10011111 +// CHECK-INST: bfvdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x9f,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b299f + +bfvdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10011111 +// CHECK-INST: bfvdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x9f,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b299f + diff --git a/llvm/test/MC/AArch64/SME2/bmopa-diagnostics.s b/llvm/test/MC/AArch64/SME2/bmopa-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bmopa-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid tile + +bmopa za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bmopa za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate + +bmopa za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bmopa za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bmopa za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bmopa za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid suffixes + +bmopa za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s +// CHECK-NEXT: bmopa za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bmopa za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/bmopa.s b/llvm/test/MC/AArch64/SME2/bmopa.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bmopa.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +bmopa za0.s, p0/m, p0/m, z0.s, z0.s // 10000000-10000000-00000000-00001000 +// CHECK-INST: bmopa za0.s, p0/m, p0/m, z0.s, z0.s +// CHECK-ENCODING: [0x08,0x00,0x80,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80800008 + +bmopa za1.s, p5/m, p2/m, z10.s, z21.s // 10000000-10010101-01010101-01001001 +// CHECK-INST: bmopa za1.s, p5/m, p2/m, z10.s, z21.s +// CHECK-ENCODING: [0x49,0x55,0x95,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80955549 + +bmopa za3.s, p3/m, p7/m, z13.s, z8.s // 10000000-10001000-11101101-10101011 +// CHECK-INST: bmopa za3.s, p3/m, p7/m, z13.s, z8.s +// CHECK-ENCODING: [0xab,0xed,0x88,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 8088edab + +bmopa za3.s, p7/m, p7/m, z31.s, z31.s // 10000000-10011111-11111111-11101011 +// CHECK-INST: bmopa za3.s, p7/m, p7/m, z31.s, z31.s +// CHECK-ENCODING: [0xeb,0xff,0x9f,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809fffeb + +bmopa za1.s, p3/m, p0/m, z17.s, z16.s // 10000000-10010000-00001110-00101001 +// CHECK-INST: bmopa za1.s, p3/m, p0/m, z17.s, z16.s +// CHECK-ENCODING: [0x29,0x0e,0x90,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80900e29 + +bmopa za1.s, p1/m, p4/m, z1.s, z30.s // 10000000-10011110-10000100-00101001 +// CHECK-INST: bmopa za1.s, p1/m, p4/m, z1.s, z30.s +// CHECK-ENCODING: [0x29,0x84,0x9e,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809e8429 + +bmopa za0.s, p5/m, p2/m, z19.s, z20.s // 10000000-10010100-01010110-01101000 +// CHECK-INST: bmopa za0.s, p5/m, p2/m, z19.s, z20.s +// CHECK-ENCODING: [0x68,0x56,0x94,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80945668 + +bmopa za0.s, p6/m, p0/m, z12.s, z2.s // 10000000-10000010-00011001-10001000 +// CHECK-INST: bmopa za0.s, p6/m, p0/m, z12.s, z2.s +// CHECK-ENCODING: [0x88,0x19,0x82,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80821988 + +bmopa za1.s, p2/m, p6/m, z1.s, z26.s // 10000000-10011010-11001000-00101001 +// CHECK-INST: bmopa za1.s, p2/m, p6/m, z1.s, z26.s +// CHECK-ENCODING: [0x29,0xc8,0x9a,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809ac829 + +bmopa za1.s, p2/m, p0/m, z22.s, z30.s // 10000000-10011110-00001010-11001001 +// CHECK-INST: bmopa za1.s, p2/m, p0/m, z22.s, z30.s +// CHECK-ENCODING: [0xc9,0x0a,0x9e,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809e0ac9 + +bmopa za2.s, p5/m, p7/m, z9.s, z1.s // 10000000-10000001-11110101-00101010 +// CHECK-INST: bmopa za2.s, p5/m, p7/m, z9.s, z1.s +// CHECK-ENCODING: [0x2a,0xf5,0x81,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 8081f52a + +bmopa za3.s, p2/m, p5/m, z12.s, z11.s // 10000000-10001011-10101001-10001011 +// CHECK-INST: bmopa za3.s, p2/m, p5/m, z12.s, z11.s +// CHECK-ENCODING: [0x8b,0xa9,0x8b,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 808ba98b + diff --git a/llvm/test/MC/AArch64/SME2/bmops-diagnostics.s b/llvm/test/MC/AArch64/SME2/bmops-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bmops-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid tile + +bmops za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bmops za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate + +bmops za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: bmops za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bmops za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: bmops za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid suffixes + +bmops za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s +// CHECK-NEXT: bmops za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +bmops za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/bmops.s b/llvm/test/MC/AArch64/SME2/bmops.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/bmops.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +bmops za0.s, p0/m, p0/m, z0.s, z0.s // 10000000-10000000-00000000-00011000 +// CHECK-INST: bmops za0.s, p0/m, p0/m, z0.s, z0.s +// CHECK-ENCODING: [0x18,0x00,0x80,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80800018 + +bmops za1.s, p5/m, p2/m, z10.s, z21.s // 10000000-10010101-01010101-01011001 +// CHECK-INST: bmops za1.s, p5/m, p2/m, z10.s, z21.s +// CHECK-ENCODING: [0x59,0x55,0x95,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80955559 + +bmops za3.s, p3/m, p7/m, z13.s, z8.s // 10000000-10001000-11101101-10111011 +// CHECK-INST: bmops za3.s, p3/m, p7/m, z13.s, z8.s +// CHECK-ENCODING: [0xbb,0xed,0x88,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 8088edbb + +bmops za3.s, p7/m, p7/m, z31.s, z31.s // 10000000-10011111-11111111-11111011 +// CHECK-INST: bmops za3.s, p7/m, p7/m, z31.s, z31.s +// CHECK-ENCODING: [0xfb,0xff,0x9f,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809ffffb + +bmops za1.s, p3/m, p0/m, z17.s, z16.s // 10000000-10010000-00001110-00111001 +// CHECK-INST: bmops za1.s, p3/m, p0/m, z17.s, z16.s +// CHECK-ENCODING: [0x39,0x0e,0x90,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80900e39 + +bmops za1.s, p1/m, p4/m, z1.s, z30.s // 10000000-10011110-10000100-00111001 +// CHECK-INST: bmops za1.s, p1/m, p4/m, z1.s, z30.s +// CHECK-ENCODING: [0x39,0x84,0x9e,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809e8439 + +bmops za0.s, p5/m, p2/m, z19.s, z20.s // 10000000-10010100-01010110-01111000 +// CHECK-INST: bmops za0.s, p5/m, p2/m, z19.s, z20.s +// CHECK-ENCODING: [0x78,0x56,0x94,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80945678 + +bmops za0.s, p6/m, p0/m, z12.s, z2.s // 10000000-10000010-00011001-10011000 +// CHECK-INST: bmops za0.s, p6/m, p0/m, z12.s, z2.s +// CHECK-ENCODING: [0x98,0x19,0x82,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 80821998 + +bmops za1.s, p2/m, p6/m, z1.s, z26.s // 10000000-10011010-11001000-00111001 +// CHECK-INST: bmops za1.s, p2/m, p6/m, z1.s, z26.s +// CHECK-ENCODING: [0x39,0xc8,0x9a,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809ac839 + +bmops za1.s, p2/m, p0/m, z22.s, z30.s // 10000000-10011110-00001010-11011001 +// CHECK-INST: bmops za1.s, p2/m, p0/m, z22.s, z30.s +// CHECK-ENCODING: [0xd9,0x0a,0x9e,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 809e0ad9 + +bmops za2.s, p5/m, p7/m, z9.s, z1.s // 10000000-10000001-11110101-00111010 +// CHECK-INST: bmops za2.s, p5/m, p7/m, z9.s, z1.s +// CHECK-ENCODING: [0x3a,0xf5,0x81,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 8081f53a + +bmops za3.s, p2/m, p5/m, z12.s, z11.s // 10000000-10001011-10101001-10011011 +// CHECK-INST: bmops za3.s, p2/m, p5/m, z12.s, z11.s +// CHECK-ENCODING: [0x9b,0xa9,0x8b,0x80] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: 808ba99b + diff --git a/llvm/test/MC/AArch64/SME2/fclamp-diagnostics.s b/llvm/test/MC/AArch64/SME2/fclamp-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fclamp-diagnostics.s @@ -0,0 +1,38 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fclamp {z0.h-z2.h}, z0.h, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fclamp {z0.h-z2.h}, z0.h, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fclamp {z0.d-z4.d}, z5.d, z6.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: fclamp {z0.d-z4.d}, z5.d, z6.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fclamp {z23.s-z24.s}, z13.s, z8.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fclamp {z23.s-z24.s}, z13.s, z8.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fclamp {z21.h-z24.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element type +// CHECK-NEXT: fclamp {z21.h-z24.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fclamp {z0.h-z1.h}, z0.h, z4.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fclamp {z0.h-z1.h}, z0.h, z4.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fclamp {z0.s-z3.s}, z5.d, z6.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: fclamp {z0.s-z3.s}, z5.d, z6.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fclamp.s b/llvm/test/MC/AArch64/SME2/fclamp.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fclamp.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fclamp {z0.d, z1.d}, z0.d, z0.d // 11000001-11100000-11000000-00000000 +// CHECK-INST: fclamp { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xc0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0c000 + +fclamp {z20.d, z21.d}, z10.d, z21.d // 11000001-11110101-11000001-01010100 +// CHECK-INST: fclamp { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xc1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5c154 + +fclamp {z22.d, z23.d}, z13.d, z8.d // 11000001-11101000-11000001-10110110 +// CHECK-INST: fclamp { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xc1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8c1b6 + +fclamp {z30.d, z31.d}, z31.d, z31.d // 11000001-11111111-11000011-11111110 +// CHECK-INST: fclamp { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xc3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffc3fe + + +fclamp {z0.h, z1.h}, z0.h, z0.h // 11000001-01100000-11000000-00000000 +// CHECK-INST: fclamp { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xc0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160c000 + +fclamp {z20.h, z21.h}, z10.h, z21.h // 11000001-01110101-11000001-01010100 +// CHECK-INST: fclamp { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xc1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175c154 + +fclamp {z22.h, z23.h}, z13.h, z8.h // 11000001-01101000-11000001-10110110 +// CHECK-INST: fclamp { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb6,0xc1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168c1b6 + +fclamp {z30.h, z31.h}, z31.h, z31.h // 11000001-01111111-11000011-11111110 +// CHECK-INST: fclamp { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfe,0xc3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fc3fe + + +fclamp {z0.s, z1.s}, z0.s, z0.s // 11000001-10100000-11000000-00000000 +// CHECK-INST: fclamp { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xc0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0c000 + +fclamp {z20.s, z21.s}, z10.s, z21.s // 11000001-10110101-11000001-01010100 +// CHECK-INST: fclamp { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xc1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5c154 + +fclamp {z22.s, z23.s}, z13.s, z8.s // 11000001-10101000-11000001-10110110 +// CHECK-INST: fclamp { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb6,0xc1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8c1b6 + +fclamp {z30.s, z31.s}, z31.s, z31.s // 11000001-10111111-11000011-11111110 +// CHECK-INST: fclamp { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfe,0xc3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfc3fe + + +fclamp {z0.d - z3.d}, z0.d, z0.d // 11000001-11100000-11001000-00000000 +// CHECK-INST: fclamp { z0.d - z3.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xc8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0c800 + +fclamp {z20.d - z23.d}, z10.d, z21.d // 11000001-11110101-11001001-01010100 +// CHECK-INST: fclamp { z20.d - z23.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xc9,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5c954 + +fclamp {z20.d - z23.d}, z13.d, z8.d // 11000001-11101000-11001001-10110100 +// CHECK-INST: fclamp { z20.d - z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb4,0xc9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8c9b4 + +fclamp {z28.d - z31.d}, z31.d, z31.d // 11000001-11111111-11001011-11111100 +// CHECK-INST: fclamp { z28.d - z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfc,0xcb,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffcbfc + + +fclamp {z0.h - z3.h}, z0.h, z0.h // 11000001-01100000-11001000-00000000 +// CHECK-INST: fclamp { z0.h - z3.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xc8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160c800 + +fclamp {z20.h - z23.h}, z10.h, z21.h // 11000001-01110101-11001001-01010100 +// CHECK-INST: fclamp { z20.h - z23.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xc9,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175c954 + +fclamp {z20.h - z23.h}, z13.h, z8.h // 11000001-01101000-11001001-10110100 +// CHECK-INST: fclamp { z20.h - z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb4,0xc9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168c9b4 + +fclamp {z28.h - z31.h}, z31.h, z31.h // 11000001-01111111-11001011-11111100 +// CHECK-INST: fclamp { z28.h - z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfc,0xcb,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fcbfc + + +fclamp {z0.s - z3.s}, z0.s, z0.s // 11000001-10100000-11001000-00000000 +// CHECK-INST: fclamp { z0.s - z3.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xc8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0c800 + +fclamp {z20.s - z23.s}, z10.s, z21.s // 11000001-10110101-11001001-01010100 +// CHECK-INST: fclamp { z20.s - z23.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xc9,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5c954 + +fclamp {z20.s - z23.s}, z13.s, z8.s // 11000001-10101000-11001001-10110100 +// CHECK-INST: fclamp { z20.s - z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb4,0xc9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8c9b4 + +fclamp {z28.s - z31.s}, z31.s, z31.s // 11000001-10111111-11001011-11111100 +// CHECK-INST: fclamp { z28.s - z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfc,0xcb,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfcbfc + diff --git a/llvm/test/MC/AArch64/SME2/fcvtzs-diagnostics.s b/llvm/test/MC/AArch64/SME2/fcvtzs-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fcvtzs-diagnostics.s @@ -0,0 +1,23 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fcvtzs {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtzs {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtzs {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: fcvtzs {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fcvtzs {z0.d-z1.d}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtzs {z0.d-z1.d}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/fcvtzs.s b/llvm/test/MC/AArch64/SME2/fcvtzs.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fcvtzs.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fcvtzs {z0.s, z1.s}, {z0.s, z1.s} // 11000001-00100001-11100000-00000000 +// CHECK-INST: fcvtzs { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e000 + +fcvtzs {z20.s, z21.s}, {z10.s, z11.s} // 11000001-00100001-11100001-01010100 +// CHECK-INST: fcvtzs { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e154 + +fcvtzs {z22.s, z23.s}, {z12.s, z13.s} // 11000001-00100001-11100001-10010110 +// CHECK-INST: fcvtzs { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e196 + +fcvtzs {z30.s, z31.s}, {z30.s, z31.s} // 11000001-00100001-11100011-11011110 +// CHECK-INST: fcvtzs { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e3de + + +fcvtzs {z0.s - z3.s}, {z0.s - z3.s} // 11000001-00110001-11100000-00000000 +// CHECK-INST: fcvtzs { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e000 + +fcvtzs {z20.s - z23.s}, {z8.s - z11.s} // 11000001-00110001-11100001-00010100 +// CHECK-INST: fcvtzs { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e114 + +fcvtzs {z20.s - z23.s}, {z12.s - z15.s} // 11000001-00110001-11100001-10010100 +// CHECK-INST: fcvtzs { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e194 + +fcvtzs {z28.s - z31.s}, {z28.s - z31.s} // 11000001-00110001-11100011-10011100 +// CHECK-INST: fcvtzs { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e39c + diff --git a/llvm/test/MC/AArch64/SME2/fcvtzu-diagnostics.s b/llvm/test/MC/AArch64/SME2/fcvtzu-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fcvtzu-diagnostics.s @@ -0,0 +1,23 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fcvtzu {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtzu {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fcvtzu {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: fcvtzu {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fcvtzu {z0.d-z1.d}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fcvtzu {z0.d-z1.d}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/fcvtzu.s b/llvm/test/MC/AArch64/SME2/fcvtzu.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fcvtzu.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fcvtzu {z0.s, z1.s}, {z0.s, z1.s} // 11000001-00100001-11100000-00100000 +// CHECK-INST: fcvtzu { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x20,0xe0,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e020 + +fcvtzu {z20.s, z21.s}, {z10.s, z11.s} // 11000001-00100001-11100001-01110100 +// CHECK-INST: fcvtzu { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x74,0xe1,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e174 + +fcvtzu {z22.s, z23.s}, {z12.s, z13.s} // 11000001-00100001-11100001-10110110 +// CHECK-INST: fcvtzu { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0xb6,0xe1,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e1b6 + +fcvtzu {z30.s, z31.s}, {z30.s, z31.s} // 11000001-00100001-11100011-11111110 +// CHECK-INST: fcvtzu { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xfe,0xe3,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121e3fe + + +fcvtzu {z0.s - z3.s}, {z0.s - z3.s} // 11000001-00110001-11100000-00100000 +// CHECK-INST: fcvtzu { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x20,0xe0,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e020 + +fcvtzu {z20.s - z23.s}, {z8.s - z11.s} // 11000001-00110001-11100001-00110100 +// CHECK-INST: fcvtzu { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x34,0xe1,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e134 + +fcvtzu {z20.s - z23.s}, {z12.s - z15.s} // 11000001-00110001-11100001-10110100 +// CHECK-INST: fcvtzu { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0xb4,0xe1,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e1b4 + +fcvtzu {z28.s - z31.s}, {z28.s - z31.s} // 11000001-00110001-11100011-10111100 +// CHECK-INST: fcvtzu { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0xbc,0xe3,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131e3bc + diff --git a/llvm/test/MC/AArch64/SME2/fdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/fdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fdot-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid select register + +fdot za.s[w7, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: fdot za.s[w7, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot za.s[w12, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: fdot za.s[w12, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid select offset + +fdot za.s[w8, 8], {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: fdot za.s[w8, 8], {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot za.s[w8, -1], {z0.h-z3.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: fdot za.s[w8, -1], {z0.h-z3.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Out of range element index + +fdot za.s[w8, 0], {z0.h-z1.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: fdot za.s[w8, 0], {z0.h-z1.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot za.s[w8, 0], {z0.h-z3.h}, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: fdot za.s[w8, 0], {z0.h-z3.h}, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// ZPR range constraint + +fdot za.s[w8, 5], {z0.h-z1.h}, z16.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fdot za.s[w8, 5], {z0.h-z1.h}, z16.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot za.s[w8, 5], {z0.h-z3.h}, z16.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fdot za.s[w8, 5], {z0.h-z3.h}, z16.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// fdot (multi-single) + +fdot za.s[w8, 5], {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fdot za.s[w8, 5], {z0.h-z1.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fdot za.s[w8, 5], {z0.h-z3.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fdot za.s[w8, 5], {z0.h-z3.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fdot.s b/llvm/test/MC/AArch64/SME2/fdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fdot.s @@ -0,0 +1,883 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-00100000-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x10,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201000 + +fdot za.s[w8, 0], {z0.h, z1.h}, z0.h // 11000001-00100000-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x10,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201000 + +fdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-00100101-01010001-01000101 +// CHECK-INST: fdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x45,0x51,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255145 + +fdot za.s[w10, 5], {z10.h, z11.h}, z5.h // 11000001-00100101-01010001-01000101 +// CHECK-INST: fdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x45,0x51,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255145 + +fdot za.s[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-00101000-01110001-10100111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa7,0x71,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12871a7 + +fdot za.s[w11, 7], {z13.h, z14.h}, z8.h // 11000001-00101000-01110001-10100111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa7,0x71,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12871a7 + +fdot za.s[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-00101111-01110011-11100111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe7,0x73,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f73e7 + +fdot za.s[w11, 7], {z31.h, z0.h}, z15.h // 11000001-00101111-01110011-11100111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe7,0x73,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f73e7 + +fdot za.s[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-00100000-00010010-00100101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x25,0x12,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201225 + +fdot za.s[w8, 5], {z17.h, z18.h}, z0.h // 11000001-00100000-00010010-00100101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x25,0x12,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201225 + +fdot za.s[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-00101110-00010000-00100001 +// CHECK-INST: fdot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x10,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1021 + +fdot za.s[w8, 1], {z1.h, z2.h}, z14.h // 11000001-00101110-00010000-00100001 +// CHECK-INST: fdot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x10,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1021 + +fdot za.s[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-00100100-01010010-01100000 +// CHECK-INST: fdot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x52,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245260 + +fdot za.s[w10, 0], {z19.h, z20.h}, z4.h // 11000001-00100100-01010010-01100000 +// CHECK-INST: fdot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x52,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245260 + +fdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-00100010-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x11,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221180 + +fdot za.s[w8, 0], {z12.h, z13.h}, z2.h // 11000001-00100010-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x11,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221180 + +fdot za.s[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-00101010-01010000-00100001 +// CHECK-INST: fdot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x50,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5021 + +fdot za.s[w10, 1], {z1.h, z2.h}, z10.h // 11000001-00101010-01010000-00100001 +// CHECK-INST: fdot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x50,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5021 + +fdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-00101110-00010010-11000101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc5,0x12,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e12c5 + +fdot za.s[w8, 5], {z22.h, z23.h}, z14.h // 11000001-00101110-00010010-11000101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc5,0x12,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e12c5 + +fdot za.s[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-00100001-01110001-00100010 +// CHECK-INST: fdot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x22,0x71,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217122 + +fdot za.s[w11, 2], {z9.h, z10.h}, z1.h // 11000001-00100001-01110001-00100010 +// CHECK-INST: fdot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x22,0x71,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217122 + +fdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-00101011-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x87,0x31,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3187 + +fdot za.s[w9, 7], {z12.h, z13.h}, z11.h // 11000001-00101011-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x87,0x31,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3187 + + +fdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00001000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501008 + +fdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00001000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501008 + +fdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01001101 +// CHECK-INST: fdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x4d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155554d + +fdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01001101 +// CHECK-INST: fdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x4d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155554d + +fdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10001111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x8f,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d8f + +fdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10001111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x8f,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d8f + +fdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11001111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xcf,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fcf + +fdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11001111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xcf,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fcf + +fdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00001101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x0d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e0d + +fdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00001101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x0d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e0d + +fdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00001001 +// CHECK-INST: fdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1409 + +fdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00001001 +// CHECK-INST: fdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1409 + +fdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01001000 +// CHECK-INST: fdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x48,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545648 + +fdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01001000 +// CHECK-INST: fdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x48,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545648 + +fdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10001000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x88,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521988 + +fdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10001000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x88,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521988 + +fdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00001001 +// CHECK-INST: fdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x09,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5809 + +fdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00001001 +// CHECK-INST: fdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x09,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5809 + +fdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11001101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1acd + +fdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11001101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1acd + +fdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00001010 +// CHECK-INST: fdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151750a + +fdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00001010 +// CHECK-INST: fdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151750a + +fdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10001111 +// CHECK-INST: fdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x8f,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b398f + +fdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10001111 +// CHECK-INST: fdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x8f,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b398f + + +fdot za.s[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-10100000-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x10,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01000 + +fdot za.s[w8, 0], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-10100000-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x10,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01000 + +fdot za.s[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-10110100-01010001-01000101 +// CHECK-INST: fdot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x45,0x51,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45145 + +fdot za.s[w10, 5], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-10110100-01010001-01000101 +// CHECK-INST: fdot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x45,0x51,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45145 + +fdot za.s[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-10101000-01110001-10000111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x87,0x71,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87187 + +fdot za.s[w11, 7], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-10101000-01110001-10000111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x87,0x71,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87187 + +fdot za.s[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-10111110-01110011-11000111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0x73,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be73c7 + +fdot za.s[w11, 7], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-10111110-01110011-11000111 +// CHECK-INST: fdot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0x73,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be73c7 + +fdot za.s[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-10110000-00010010-00000101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x12,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01205 + +fdot za.s[w8, 5], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-10110000-00010010-00000101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x12,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01205 + +fdot za.s[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-10111110-00010000-00000001 +// CHECK-INST: fdot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x10,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1001 + +fdot za.s[w8, 1], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-10111110-00010000-00000001 +// CHECK-INST: fdot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x10,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1001 + +fdot za.s[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-10110100-01010010-01000000 +// CHECK-INST: fdot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x52,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45240 + +fdot za.s[w10, 0], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-10110100-01010010-01000000 +// CHECK-INST: fdot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x52,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45240 + +fdot za.s[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-10100010-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x11,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21180 + +fdot za.s[w8, 0], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-10100010-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x11,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21180 + +fdot za.s[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-10111010-01010000-00000001 +// CHECK-INST: fdot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x50,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5001 + +fdot za.s[w10, 1], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-10111010-01010000-00000001 +// CHECK-INST: fdot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x50,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5001 + +fdot za.s[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-10111110-00010010-11000101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc5,0x12,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be12c5 + +fdot za.s[w8, 5], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-10111110-00010010-11000101 +// CHECK-INST: fdot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc5,0x12,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be12c5 + +fdot za.s[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-10100000-01110001-00000010 +// CHECK-INST: fdot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x02,0x71,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07102 + +fdot za.s[w11, 2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-10100000-01110001-00000010 +// CHECK-INST: fdot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x02,0x71,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07102 + +fdot za.s[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-10101010-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x87,0x31,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3187 + +fdot za.s[w9, 7], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-10101010-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x87,0x31,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3187 + + +fdot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-00110000-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x10,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301000 + +fdot za.s[w8, 0], {z0.h - z3.h}, z0.h // 11000001-00110000-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x10,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301000 + +fdot za.s[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-00110101-01010001-01000101 +// CHECK-INST: fdot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x45,0x51,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355145 + +fdot za.s[w10, 5], {z10.h - z13.h}, z5.h // 11000001-00110101-01010001-01000101 +// CHECK-INST: fdot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x45,0x51,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355145 + +fdot za.s[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-00111000-01110001-10100111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa7,0x71,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13871a7 + +fdot za.s[w11, 7], {z13.h - z16.h}, z8.h // 11000001-00111000-01110001-10100111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa7,0x71,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13871a7 + +fdot za.s[w11, 7, vgx4], {z31.h - z2.h}, z15.h // 11000001-00111111-01110011-11100111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe7,0x73,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f73e7 + +fdot za.s[w11, 7], {z31.h - z2.h}, z15.h // 11000001-00111111-01110011-11100111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe7,0x73,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f73e7 + +fdot za.s[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-00110000-00010010-00100101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x25,0x12,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301225 + +fdot za.s[w8, 5], {z17.h - z20.h}, z0.h // 11000001-00110000-00010010-00100101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x25,0x12,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301225 + +fdot za.s[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-00111110-00010000-00100001 +// CHECK-INST: fdot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x10,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1021 + +fdot za.s[w8, 1], {z1.h - z4.h}, z14.h // 11000001-00111110-00010000-00100001 +// CHECK-INST: fdot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x10,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1021 + +fdot za.s[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-00110100-01010010-01100000 +// CHECK-INST: fdot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x52,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345260 + +fdot za.s[w10, 0], {z19.h - z22.h}, z4.h // 11000001-00110100-01010010-01100000 +// CHECK-INST: fdot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x52,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345260 + +fdot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-00110010-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x11,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321180 + +fdot za.s[w8, 0], {z12.h - z15.h}, z2.h // 11000001-00110010-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x11,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321180 + +fdot za.s[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-00111010-01010000-00100001 +// CHECK-INST: fdot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x50,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5021 + +fdot za.s[w10, 1], {z1.h - z4.h}, z10.h // 11000001-00111010-01010000-00100001 +// CHECK-INST: fdot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x50,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5021 + +fdot za.s[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-00111110-00010010-11000101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc5,0x12,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e12c5 + +fdot za.s[w8, 5], {z22.h - z25.h}, z14.h // 11000001-00111110-00010010-11000101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc5,0x12,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e12c5 + +fdot za.s[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-00110001-01110001-00100010 +// CHECK-INST: fdot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x22,0x71,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317122 + +fdot za.s[w11, 2], {z9.h - z12.h}, z1.h // 11000001-00110001-01110001-00100010 +// CHECK-INST: fdot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x22,0x71,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317122 + +fdot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-00111011-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x87,0x31,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3187 + +fdot za.s[w9, 7], {z12.h - z15.h}, z11.h // 11000001-00111011-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x87,0x31,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3187 + + +fdot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00001000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509008 + +fdot za.s[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00001000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509008 + +fdot za.s[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00001101 +// CHECK-INST: fdot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x0d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d50d + +fdot za.s[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00001101 +// CHECK-INST: fdot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x0d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d50d + +fdot za.s[w11, 7, vgx4], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10001111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x8f,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd8f + +fdot za.s[w11, 7], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10001111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x8f,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd8f + +fdot za.s[w11, 7, vgx4], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10001111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x8f,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff8f + +fdot za.s[w11, 7], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10001111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x8f,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff8f + +fdot za.s[w8, 5, vgx4], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00001101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x0d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e0d + +fdot za.s[w8, 5], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00001101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x0d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e0d + +fdot za.s[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00001001 +// CHECK-INST: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9409 + +fdot za.s[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00001001 +// CHECK-INST: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9409 + +fdot za.s[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00001000 +// CHECK-INST: fdot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x08,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d608 + +fdot za.s[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00001000 +// CHECK-INST: fdot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x08,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d608 + +fdot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10001000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x88,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529988 + +fdot za.s[w8, 0], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10001000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x88,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529988 + +fdot za.s[w10, 1, vgx4], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00001001 +// CHECK-INST: fdot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x09,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad809 + +fdot za.s[w10, 1], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00001001 +// CHECK-INST: fdot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x09,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad809 + +fdot za.s[w8, 5, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10001101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x8d,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a8d + +fdot za.s[w8, 5], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10001101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x8d,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a8d + +fdot za.s[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00001010 +// CHECK-INST: fdot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f50a + +fdot za.s[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00001010 +// CHECK-INST: fdot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f50a + +fdot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10001111 +// CHECK-INST: fdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x8f,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb98f + +fdot za.s[w9, 7], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10001111 +// CHECK-INST: fdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x8f,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb98f + + +fdot za.s[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-10100001-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x10,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11000 + +fdot za.s[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-10100001-00010000-00000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x10,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11000 + +fdot za.s[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-10110101-01010001-00000101 +// CHECK-INST: fdot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x05,0x51,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55105 + +fdot za.s[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-10110101-01010001-00000101 +// CHECK-INST: fdot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x05,0x51,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55105 + +fdot za.s[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-01110001-10000111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x71,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97187 + +fdot za.s[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-01110001-10000111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x71,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97187 + +fdot za.s[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-10111101-01110011-10000111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x87,0x73,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7387 + +fdot za.s[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-10111101-01110011-10000111 +// CHECK-INST: fdot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x87,0x73,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7387 + +fdot za.s[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-10110001-00010010-00000101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x05,0x12,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11205 + +fdot za.s[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-10110001-00010010-00000101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x05,0x12,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11205 + +fdot za.s[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-10111101-00010000-00000001 +// CHECK-INST: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x10,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1001 + +fdot za.s[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-10111101-00010000-00000001 +// CHECK-INST: fdot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x10,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1001 + +fdot za.s[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-10110101-01010010-00000000 +// CHECK-INST: fdot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x52,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55200 + +fdot za.s[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-10110101-01010010-00000000 +// CHECK-INST: fdot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x52,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55200 + +fdot za.s[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-10100001-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x11,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11180 + +fdot za.s[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-10100001-00010001-10000000 +// CHECK-INST: fdot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x11,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11180 + +fdot za.s[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-10111001-01010000-00000001 +// CHECK-INST: fdot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x50,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95001 + +fdot za.s[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-10111001-01010000-00000001 +// CHECK-INST: fdot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x50,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95001 + +fdot za.s[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-10111101-00010010-10000101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x85,0x12,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1285 + +fdot za.s[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-10111101-00010010-10000101 +// CHECK-INST: fdot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x85,0x12,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1285 + +fdot za.s[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-10100001-01110001-00000010 +// CHECK-INST: fdot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0x71,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17102 + +fdot za.s[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-10100001-01110001-00000010 +// CHECK-INST: fdot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0x71,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17102 + +fdot za.s[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x31,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93187 + +fdot za.s[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-10101001-00110001-10000111 +// CHECK-INST: fdot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x31,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93187 + diff --git a/llvm/test/MC/AArch64/SME2/fmax-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmax-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fmax-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fmax {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmax {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmax {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fmax {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +fmax {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fmax {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fmax {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fmax {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fmax.s b/llvm/test/MC/AArch64/SME2/fmax.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fmax.s @@ -0,0 +1,313 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fmax {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100001-00000000 +// CHECK-INST: fmax { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x00,0xa1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a100 + +fmax {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100001-00010100 +// CHECK-INST: fmax { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x14,0xa1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a114 + +fmax {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100001-00010110 +// CHECK-INST: fmax { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x16,0xa1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a116 + +fmax {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100001-00011110 +// CHECK-INST: fmax { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x1e,0xa1,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa11e + + +fmax {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110001-00000000 +// CHECK-INST: fmax { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0xb1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b100 + +fmax {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110001-00010100 +// CHECK-INST: fmax { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x14,0xb1,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b114 + +fmax {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110001-00010110 +// CHECK-INST: fmax { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x16,0xb1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b116 + +fmax {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110001-00011110 +// CHECK-INST: fmax { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x1e,0xb1,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb11e + + +fmax {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100001-00000000 +// CHECK-INST: fmax { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0xa1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a100 + +fmax {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100001-00010100 +// CHECK-INST: fmax { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x14,0xa1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a114 + +fmax {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100001-00010110 +// CHECK-INST: fmax { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x16,0xa1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a116 + +fmax {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100001-00011110 +// CHECK-INST: fmax { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x1e,0xa1,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa11e + + +fmax {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110001-00000000 +// CHECK-INST: fmax { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xb1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b100 + +fmax {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110001-00010100 +// CHECK-INST: fmax { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x14,0xb1,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b114 + +fmax {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110001-00010110 +// CHECK-INST: fmax { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x16,0xb1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b116 + +fmax {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110001-00011110 +// CHECK-INST: fmax { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x1e,0xb1,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb11e + + +fmax {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100001-00000000 +// CHECK-INST: fmax { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x00,0xa1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a100 + +fmax {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100001-00010100 +// CHECK-INST: fmax { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x14,0xa1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a114 + +fmax {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100001-00010110 +// CHECK-INST: fmax { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x16,0xa1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a116 + +fmax {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100001-00011110 +// CHECK-INST: fmax { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x1e,0xa1,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa11e + + +fmax {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110001-00000000 +// CHECK-INST: fmax { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xb1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b100 + +fmax {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110001-00010100 +// CHECK-INST: fmax { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x14,0xb1,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b114 + +fmax {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110001-00010110 +// CHECK-INST: fmax { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x16,0xb1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b116 + +fmax {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110001-00011110 +// CHECK-INST: fmax { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x1e,0xb1,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb11e + + +fmax {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101001-00000000 +// CHECK-INST: fmax { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x00,0xa9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a900 + +fmax {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101001-00010100 +// CHECK-INST: fmax { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x14,0xa9,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a914 + +fmax {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101001-00010100 +// CHECK-INST: fmax { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x14,0xa9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a914 + +fmax {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101001-00011100 +// CHECK-INST: fmax { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x1c,0xa9,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa91c + + +fmax {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111001-00000000 +// CHECK-INST: fmax { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xb9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b900 + +fmax {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111001-00010100 +// CHECK-INST: fmax { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x14,0xb9,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b914 + +fmax {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111001-00010100 +// CHECK-INST: fmax { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x14,0xb9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b914 + +fmax {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111001-00011100 +// CHECK-INST: fmax { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x1c,0xb9,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb91c + + +fmax {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101001-00000000 +// CHECK-INST: fmax { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0xa9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a900 + +fmax {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101001-00010100 +// CHECK-INST: fmax { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x14,0xa9,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a914 + +fmax {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101001-00010100 +// CHECK-INST: fmax { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x14,0xa9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a914 + +fmax {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101001-00011100 +// CHECK-INST: fmax { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x1c,0xa9,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa91c + + +fmax {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111001-00000000 +// CHECK-INST: fmax { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xb9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b900 + +fmax {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111001-00010100 +// CHECK-INST: fmax { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x14,0xb9,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b914 + +fmax {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111001-00010100 +// CHECK-INST: fmax { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x14,0xb9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b914 + +fmax {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111001-00011100 +// CHECK-INST: fmax { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x1c,0xb9,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb91c + + +fmax {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101001-00000000 +// CHECK-INST: fmax { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x00,0xa9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a900 + +fmax {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101001-00010100 +// CHECK-INST: fmax { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x14,0xa9,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a914 + +fmax {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101001-00010100 +// CHECK-INST: fmax { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x14,0xa9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a914 + +fmax {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101001-00011100 +// CHECK-INST: fmax { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x1c,0xa9,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa91c + + +fmax {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111001-00000000 +// CHECK-INST: fmax { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xb9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b900 + +fmax {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111001-00010100 +// CHECK-INST: fmax { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x14,0xb9,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b914 + +fmax {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111001-00010100 +// CHECK-INST: fmax { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xb9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b914 + +fmax {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111001-00011100 +// CHECK-INST: fmax { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x1c,0xb9,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb91c + diff --git a/llvm/test/MC/AArch64/SME2/fmaxnm-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmaxnm-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fmaxnm-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fmaxnm {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmaxnm {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmaxnm {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fmaxnm {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +fmaxnm {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fmaxnm {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fmaxnm {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fmaxnm {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fmaxnm.s b/llvm/test/MC/AArch64/SME2/fmaxnm.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fmaxnm.s @@ -0,0 +1,313 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fmaxnm {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100001-00100000 +// CHECK-INST: fmaxnm { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x20,0xa1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a120 + +fmaxnm {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100001-00110100 +// CHECK-INST: fmaxnm { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x34,0xa1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a134 + +fmaxnm {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100001-00110110 +// CHECK-INST: fmaxnm { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x36,0xa1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a136 + +fmaxnm {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100001-00111110 +// CHECK-INST: fmaxnm { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x3e,0xa1,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa13e + + +fmaxnm {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110001-00100000 +// CHECK-INST: fmaxnm { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x20,0xb1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b120 + +fmaxnm {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110001-00110100 +// CHECK-INST: fmaxnm { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x34,0xb1,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b134 + +fmaxnm {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110001-00110110 +// CHECK-INST: fmaxnm { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x36,0xb1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b136 + +fmaxnm {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110001-00111110 +// CHECK-INST: fmaxnm { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x3e,0xb1,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb13e + + +fmaxnm {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100001-00100000 +// CHECK-INST: fmaxnm { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x20,0xa1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a120 + +fmaxnm {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100001-00110100 +// CHECK-INST: fmaxnm { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x34,0xa1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a134 + +fmaxnm {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100001-00110110 +// CHECK-INST: fmaxnm { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x36,0xa1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a136 + +fmaxnm {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100001-00111110 +// CHECK-INST: fmaxnm { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x3e,0xa1,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa13e + + +fmaxnm {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110001-00100000 +// CHECK-INST: fmaxnm { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x20,0xb1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b120 + +fmaxnm {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110001-00110100 +// CHECK-INST: fmaxnm { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x34,0xb1,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b134 + +fmaxnm {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110001-00110110 +// CHECK-INST: fmaxnm { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x36,0xb1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b136 + +fmaxnm {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110001-00111110 +// CHECK-INST: fmaxnm { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x3e,0xb1,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb13e + + +fmaxnm {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100001-00100000 +// CHECK-INST: fmaxnm { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x20,0xa1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a120 + +fmaxnm {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100001-00110100 +// CHECK-INST: fmaxnm { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x34,0xa1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a134 + +fmaxnm {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100001-00110110 +// CHECK-INST: fmaxnm { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x36,0xa1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a136 + +fmaxnm {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100001-00111110 +// CHECK-INST: fmaxnm { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x3e,0xa1,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa13e + + +fmaxnm {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110001-00100000 +// CHECK-INST: fmaxnm { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x20,0xb1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b120 + +fmaxnm {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110001-00110100 +// CHECK-INST: fmaxnm { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x34,0xb1,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b134 + +fmaxnm {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110001-00110110 +// CHECK-INST: fmaxnm { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x36,0xb1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b136 + +fmaxnm {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110001-00111110 +// CHECK-INST: fmaxnm { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x3e,0xb1,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb13e + + +fmaxnm {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101001-00100000 +// CHECK-INST: fmaxnm { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x20,0xa9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a920 + +fmaxnm {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101001-00110100 +// CHECK-INST: fmaxnm { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x34,0xa9,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a934 + +fmaxnm {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101001-00110100 +// CHECK-INST: fmaxnm { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x34,0xa9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a934 + +fmaxnm {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101001-00111100 +// CHECK-INST: fmaxnm { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x3c,0xa9,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa93c + + +fmaxnm {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111001-00100000 +// CHECK-INST: fmaxnm { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x20,0xb9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b920 + +fmaxnm {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111001-00110100 +// CHECK-INST: fmaxnm { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x34,0xb9,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b934 + +fmaxnm {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111001-00110100 +// CHECK-INST: fmaxnm { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x34,0xb9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b934 + +fmaxnm {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111001-00111100 +// CHECK-INST: fmaxnm { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x3c,0xb9,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb93c + + +fmaxnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101001-00100000 +// CHECK-INST: fmaxnm { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x20,0xa9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a920 + +fmaxnm {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101001-00110100 +// CHECK-INST: fmaxnm { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x34,0xa9,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a934 + +fmaxnm {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101001-00110100 +// CHECK-INST: fmaxnm { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x34,0xa9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a934 + +fmaxnm {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101001-00111100 +// CHECK-INST: fmaxnm { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x3c,0xa9,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa93c + + +fmaxnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111001-00100000 +// CHECK-INST: fmaxnm { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x20,0xb9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b920 + +fmaxnm {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111001-00110100 +// CHECK-INST: fmaxnm { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x34,0xb9,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b934 + +fmaxnm {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111001-00110100 +// CHECK-INST: fmaxnm { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x34,0xb9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b934 + +fmaxnm {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111001-00111100 +// CHECK-INST: fmaxnm { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x3c,0xb9,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb93c + + +fmaxnm {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101001-00100000 +// CHECK-INST: fmaxnm { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x20,0xa9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a920 + +fmaxnm {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101001-00110100 +// CHECK-INST: fmaxnm { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x34,0xa9,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a934 + +fmaxnm {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101001-00110100 +// CHECK-INST: fmaxnm { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x34,0xa9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a934 + +fmaxnm {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101001-00111100 +// CHECK-INST: fmaxnm { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x3c,0xa9,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa93c + + +fmaxnm {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111001-00100000 +// CHECK-INST: fmaxnm { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x20,0xb9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b920 + +fmaxnm {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111001-00110100 +// CHECK-INST: fmaxnm { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x34,0xb9,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b934 + +fmaxnm {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111001-00110100 +// CHECK-INST: fmaxnm { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x34,0xb9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b934 + +fmaxnm {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111001-00111100 +// CHECK-INST: fmaxnm { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x3c,0xb9,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb93c + diff --git a/llvm/test/MC/AArch64/SME2/fmin-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmin-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fmin-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fmin {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fmin {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fmin {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fmin {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +fmin {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fmin {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fmin {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fmin {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fmin.s b/llvm/test/MC/AArch64/SME2/fmin.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fmin.s @@ -0,0 +1,313 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fmin {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100001-00000001 +// CHECK-INST: fmin { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x01,0xa1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a101 + +fmin {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100001-00010101 +// CHECK-INST: fmin { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x15,0xa1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a115 + +fmin {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100001-00010111 +// CHECK-INST: fmin { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x17,0xa1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a117 + +fmin {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100001-00011111 +// CHECK-INST: fmin { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x1f,0xa1,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa11f + + +fmin {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110001-00000001 +// CHECK-INST: fmin { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0xb1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b101 + +fmin {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110001-00010101 +// CHECK-INST: fmin { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x15,0xb1,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b115 + +fmin {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110001-00010111 +// CHECK-INST: fmin { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x17,0xb1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b117 + +fmin {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110001-00011111 +// CHECK-INST: fmin { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x1f,0xb1,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb11f + + +fmin {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100001-00000001 +// CHECK-INST: fmin { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x01,0xa1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a101 + +fmin {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100001-00010101 +// CHECK-INST: fmin { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x15,0xa1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a115 + +fmin {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100001-00010111 +// CHECK-INST: fmin { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x17,0xa1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a117 + +fmin {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100001-00011111 +// CHECK-INST: fmin { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x1f,0xa1,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa11f + + +fmin {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110001-00000001 +// CHECK-INST: fmin { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xb1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b101 + +fmin {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110001-00010101 +// CHECK-INST: fmin { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x15,0xb1,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b115 + +fmin {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110001-00010111 +// CHECK-INST: fmin { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x17,0xb1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b117 + +fmin {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110001-00011111 +// CHECK-INST: fmin { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x1f,0xb1,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb11f + + +fmin {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100001-00000001 +// CHECK-INST: fmin { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x01,0xa1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a101 + +fmin {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100001-00010101 +// CHECK-INST: fmin { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x15,0xa1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a115 + +fmin {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100001-00010111 +// CHECK-INST: fmin { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x17,0xa1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a117 + +fmin {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100001-00011111 +// CHECK-INST: fmin { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x1f,0xa1,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa11f + + +fmin {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110001-00000001 +// CHECK-INST: fmin { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xb1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b101 + +fmin {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110001-00010101 +// CHECK-INST: fmin { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x15,0xb1,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b115 + +fmin {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110001-00010111 +// CHECK-INST: fmin { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x17,0xb1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b117 + +fmin {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110001-00011111 +// CHECK-INST: fmin { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x1f,0xb1,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb11f + + +fmin {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101001-00000001 +// CHECK-INST: fmin { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x01,0xa9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a901 + +fmin {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101001-00010101 +// CHECK-INST: fmin { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x15,0xa9,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a915 + +fmin {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101001-00010101 +// CHECK-INST: fmin { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x15,0xa9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a915 + +fmin {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101001-00011101 +// CHECK-INST: fmin { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x1d,0xa9,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa91d + + +fmin {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111001-00000001 +// CHECK-INST: fmin { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0xb9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b901 + +fmin {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111001-00010101 +// CHECK-INST: fmin { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x15,0xb9,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b915 + +fmin {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111001-00010101 +// CHECK-INST: fmin { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x15,0xb9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b915 + +fmin {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111001-00011101 +// CHECK-INST: fmin { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x1d,0xb9,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb91d + + +fmin {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101001-00000001 +// CHECK-INST: fmin { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x01,0xa9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a901 + +fmin {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101001-00010101 +// CHECK-INST: fmin { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x15,0xa9,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a915 + +fmin {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101001-00010101 +// CHECK-INST: fmin { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x15,0xa9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a915 + +fmin {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101001-00011101 +// CHECK-INST: fmin { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x1d,0xa9,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa91d + + +fmin {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111001-00000001 +// CHECK-INST: fmin { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0xb9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b901 + +fmin {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111001-00010101 +// CHECK-INST: fmin { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x15,0xb9,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b915 + +fmin {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111001-00010101 +// CHECK-INST: fmin { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x15,0xb9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b915 + +fmin {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111001-00011101 +// CHECK-INST: fmin { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x1d,0xb9,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb91d + + +fmin {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101001-00000001 +// CHECK-INST: fmin { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x01,0xa9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a901 + +fmin {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101001-00010101 +// CHECK-INST: fmin { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x15,0xa9,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a915 + +fmin {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101001-00010101 +// CHECK-INST: fmin { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x15,0xa9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a915 + +fmin {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101001-00011101 +// CHECK-INST: fmin { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x1d,0xa9,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa91d + + +fmin {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111001-00000001 +// CHECK-INST: fmin { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0xb9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b901 + +fmin {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111001-00010101 +// CHECK-INST: fmin { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x15,0xb9,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b915 + +fmin {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111001-00010101 +// CHECK-INST: fmin { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x15,0xb9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b915 + +fmin {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111001-00011101 +// CHECK-INST: fmin { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x1d,0xb9,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb91d + diff --git a/llvm/test/MC/AArch64/SME2/fminnm-diagnostics.s b/llvm/test/MC/AArch64/SME2/fminnm-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fminnm-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +fminnm {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fminnm {z0.d, z1.d}, {z0.d-z2.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fminnm {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fminnm {z1.s-z2.s}, {z0.s, z1.s}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +fminnm {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fminnm {z0.h, z1.h}, {z2.h-z3.h}, z31.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +fminnm {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: fminnm {z0.h, z1.h}, {z2.h-z3.h}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fminnm.s b/llvm/test/MC/AArch64/SME2/fminnm.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fminnm.s @@ -0,0 +1,313 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fminnm {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100001-00100001 +// CHECK-INST: fminnm { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x21,0xa1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a121 + +fminnm {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100001-00110101 +// CHECK-INST: fminnm { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x35,0xa1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a135 + +fminnm {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100001-00110111 +// CHECK-INST: fminnm { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x37,0xa1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a137 + +fminnm {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100001-00111111 +// CHECK-INST: fminnm { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x3f,0xa1,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa13f + + +fminnm {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110001-00100001 +// CHECK-INST: fminnm { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x21,0xb1,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b121 + +fminnm {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110001-00110101 +// CHECK-INST: fminnm { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x35,0xb1,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b135 + +fminnm {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110001-00110111 +// CHECK-INST: fminnm { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x37,0xb1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b137 + +fminnm {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110001-00111111 +// CHECK-INST: fminnm { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x3f,0xb1,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb13f + + +fminnm {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100001-00100001 +// CHECK-INST: fminnm { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x21,0xa1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a121 + +fminnm {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100001-00110101 +// CHECK-INST: fminnm { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x35,0xa1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a135 + +fminnm {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100001-00110111 +// CHECK-INST: fminnm { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x37,0xa1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a137 + +fminnm {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100001-00111111 +// CHECK-INST: fminnm { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x3f,0xa1,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa13f + + +fminnm {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110001-00100001 +// CHECK-INST: fminnm { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x21,0xb1,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b121 + +fminnm {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110001-00110101 +// CHECK-INST: fminnm { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x35,0xb1,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b135 + +fminnm {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110001-00110111 +// CHECK-INST: fminnm { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x37,0xb1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b137 + +fminnm {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110001-00111111 +// CHECK-INST: fminnm { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x3f,0xb1,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb13f + + +fminnm {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100001-00100001 +// CHECK-INST: fminnm { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x21,0xa1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a121 + +fminnm {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100001-00110101 +// CHECK-INST: fminnm { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x35,0xa1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a135 + +fminnm {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100001-00110111 +// CHECK-INST: fminnm { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x37,0xa1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a137 + +fminnm {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100001-00111111 +// CHECK-INST: fminnm { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x3f,0xa1,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa13f + + +fminnm {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110001-00100001 +// CHECK-INST: fminnm { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x21,0xb1,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b121 + +fminnm {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110001-00110101 +// CHECK-INST: fminnm { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x35,0xb1,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b135 + +fminnm {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110001-00110111 +// CHECK-INST: fminnm { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x37,0xb1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b137 + +fminnm {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110001-00111111 +// CHECK-INST: fminnm { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x3f,0xb1,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb13f + + +fminnm {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101001-00100001 +// CHECK-INST: fminnm { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x21,0xa9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a921 + +fminnm {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101001-00110101 +// CHECK-INST: fminnm { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x35,0xa9,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a935 + +fminnm {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101001-00110101 +// CHECK-INST: fminnm { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x35,0xa9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a935 + +fminnm {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101001-00111101 +// CHECK-INST: fminnm { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x3d,0xa9,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa93d + + +fminnm {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111001-00100001 +// CHECK-INST: fminnm { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x21,0xb9,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b921 + +fminnm {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111001-00110101 +// CHECK-INST: fminnm { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x35,0xb9,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b935 + +fminnm {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111001-00110101 +// CHECK-INST: fminnm { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x35,0xb9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b935 + +fminnm {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111001-00111101 +// CHECK-INST: fminnm { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x3d,0xb9,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb93d + + +fminnm {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101001-00100001 +// CHECK-INST: fminnm { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x21,0xa9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a921 + +fminnm {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101001-00110101 +// CHECK-INST: fminnm { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x35,0xa9,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a935 + +fminnm {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101001-00110101 +// CHECK-INST: fminnm { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x35,0xa9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a935 + +fminnm {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101001-00111101 +// CHECK-INST: fminnm { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x3d,0xa9,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa93d + + +fminnm {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111001-00100001 +// CHECK-INST: fminnm { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x21,0xb9,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b921 + +fminnm {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111001-00110101 +// CHECK-INST: fminnm { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x35,0xb9,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b935 + +fminnm {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111001-00110101 +// CHECK-INST: fminnm { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x35,0xb9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b935 + +fminnm {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111001-00111101 +// CHECK-INST: fminnm { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x3d,0xb9,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb93d + + +fminnm {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101001-00100001 +// CHECK-INST: fminnm { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x21,0xa9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a921 + +fminnm {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101001-00110101 +// CHECK-INST: fminnm { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x35,0xa9,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a935 + +fminnm {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101001-00110101 +// CHECK-INST: fminnm { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x35,0xa9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a935 + +fminnm {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101001-00111101 +// CHECK-INST: fminnm { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x3d,0xa9,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa93d + + +fminnm {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111001-00100001 +// CHECK-INST: fminnm { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x21,0xb9,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b921 + +fminnm {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111001-00110101 +// CHECK-INST: fminnm { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x35,0xb9,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b935 + +fminnm {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111001-00110101 +// CHECK-INST: fminnm { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x35,0xb9,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b935 + +fminnm {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111001-00111101 +// CHECK-INST: fminnm { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x3d,0xb9,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb93d + diff --git a/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s --- a/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/fmla-diagnostics.s @@ -66,7 +66,7 @@ // Invalid vector list. fmla za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: fmla za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fmla.s b/llvm/test/MC/AArch64/SME2/fmla.s --- a/llvm/test/MC/AArch64/SME2/fmla.s +++ b/llvm/test/MC/AArch64/SME2/fmla.s @@ -157,6 +157,151 @@ // CHECK-UNKNOWN: c16b3987 +fmla za.d[w8, 0, vgx2], {z0.d, z1.d}, z0.d[0] // 11000001-11010000-00000000-00000000 +// CHECK-INST: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d[0] +// CHECK-ENCODING: [0x00,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00000 + +fmla za.d[w8, 0], {z0.d, z1.d}, z0.d[0] // 11000001-11010000-00000000-00000000 +// CHECK-INST: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d[0] +// CHECK-ENCODING: [0x00,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00000 + +fmla za.d[w10, 5, vgx2], {z10.d, z11.d}, z5.d[1] // 11000001-11010101-01000101-01000101 +// CHECK-INST: fmla za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d[1] +// CHECK-ENCODING: [0x45,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d54545 + +fmla za.d[w10, 5], {z10.d, z11.d}, z5.d[1] // 11000001-11010101-01000101-01000101 +// CHECK-INST: fmla za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d[1] +// CHECK-ENCODING: [0x45,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d54545 + +fmla za.d[w11, 7, vgx2], {z12.d, z13.d}, z8.d[1] // 11000001-11011000-01100101-10000111 +// CHECK-INST: fmla za.d[w11, 7, vgx2], { z12.d, z13.d }, z8.d[1] +// CHECK-ENCODING: [0x87,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d86587 + +fmla za.d[w11, 7], {z12.d, z13.d}, z8.d[1] // 11000001-11011000-01100101-10000111 +// CHECK-INST: fmla za.d[w11, 7, vgx2], { z12.d, z13.d }, z8.d[1] +// CHECK-ENCODING: [0x87,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d86587 + +fmla za.d[w11, 7, vgx2], {z30.d, z31.d}, z15.d[1] // 11000001-11011111-01100111-11000111 +// CHECK-INST: fmla za.d[w11, 7, vgx2], { z30.d, z31.d }, z15.d[1] +// CHECK-ENCODING: [0xc7,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67c7 + +fmla za.d[w11, 7], {z30.d, z31.d}, z15.d[1] // 11000001-11011111-01100111-11000111 +// CHECK-INST: fmla za.d[w11, 7, vgx2], { z30.d, z31.d }, z15.d[1] +// CHECK-ENCODING: [0xc7,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67c7 + +fmla za.d[w8, 5, vgx2], {z16.d, z17.d}, z0.d[1] // 11000001-11010000-00000110-00000101 +// CHECK-INST: fmla za.d[w8, 5, vgx2], { z16.d, z17.d }, z0.d[1] +// CHECK-ENCODING: [0x05,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00605 + +fmla za.d[w8, 5], {z16.d, z17.d}, z0.d[1] // 11000001-11010000-00000110-00000101 +// CHECK-INST: fmla za.d[w8, 5, vgx2], { z16.d, z17.d }, z0.d[1] +// CHECK-ENCODING: [0x05,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00605 + +fmla za.d[w8, 1, vgx2], {z0.d, z1.d}, z14.d[1] // 11000001-11011110-00000100-00000001 +// CHECK-INST: fmla za.d[w8, 1, vgx2], { z0.d, z1.d }, z14.d[1] +// CHECK-ENCODING: [0x01,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0401 + +fmla za.d[w8, 1], {z0.d, z1.d}, z14.d[1] // 11000001-11011110-00000100-00000001 +// CHECK-INST: fmla za.d[w8, 1, vgx2], { z0.d, z1.d }, z14.d[1] +// CHECK-ENCODING: [0x01,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0401 + +fmla za.d[w10, 0, vgx2], {z18.d, z19.d}, z4.d[1] // 11000001-11010100-01000110-01000000 +// CHECK-INST: fmla za.d[w10, 0, vgx2], { z18.d, z19.d }, z4.d[1] +// CHECK-ENCODING: [0x40,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44640 + +fmla za.d[w10, 0], {z18.d, z19.d}, z4.d[1] // 11000001-11010100-01000110-01000000 +// CHECK-INST: fmla za.d[w10, 0, vgx2], { z18.d, z19.d }, z4.d[1] +// CHECK-ENCODING: [0x40,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44640 + +fmla za.d[w8, 0, vgx2], {z12.d, z13.d}, z2.d[0] // 11000001-11010010-00000001-10000000 +// CHECK-INST: fmla za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d[0] +// CHECK-ENCODING: [0x80,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20180 + +fmla za.d[w8, 0], {z12.d, z13.d}, z2.d[0] // 11000001-11010010-00000001-10000000 +// CHECK-INST: fmla za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d[0] +// CHECK-ENCODING: [0x80,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20180 + +fmla za.d[w10, 1, vgx2], {z0.d, z1.d}, z10.d[0] // 11000001-11011010-01000000-00000001 +// CHECK-INST: fmla za.d[w10, 1, vgx2], { z0.d, z1.d }, z10.d[0] +// CHECK-ENCODING: [0x01,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4001 + +fmla za.d[w10, 1], {z0.d, z1.d}, z10.d[0] // 11000001-11011010-01000000-00000001 +// CHECK-INST: fmla za.d[w10, 1, vgx2], { z0.d, z1.d }, z10.d[0] +// CHECK-ENCODING: [0x01,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4001 + +fmla za.d[w8, 5, vgx2], {z22.d, z23.d}, z14.d[0] // 11000001-11011110-00000010-11000101 +// CHECK-INST: fmla za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d[0] +// CHECK-ENCODING: [0xc5,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02c5 + +fmla za.d[w8, 5], {z22.d, z23.d}, z14.d[0] // 11000001-11011110-00000010-11000101 +// CHECK-INST: fmla za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d[0] +// CHECK-ENCODING: [0xc5,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02c5 + +fmla za.d[w11, 2, vgx2], {z8.d, z9.d}, z1.d[1] // 11000001-11010001-01100101-00000010 +// CHECK-INST: fmla za.d[w11, 2, vgx2], { z8.d, z9.d }, z1.d[1] +// CHECK-ENCODING: [0x02,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d16502 + +fmla za.d[w11, 2], {z8.d, z9.d}, z1.d[1] // 11000001-11010001-01100101-00000010 +// CHECK-INST: fmla za.d[w11, 2, vgx2], { z8.d, z9.d }, z1.d[1] +// CHECK-ENCODING: [0x02,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d16502 + +fmla za.d[w9, 7, vgx2], {z12.d, z13.d}, z11.d[0] // 11000001-11011011-00100001-10000111 +// CHECK-INST: fmla za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d[0] +// CHECK-ENCODING: [0x87,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db2187 + +fmla za.d[w9, 7], {z12.d, z13.d}, z11.d[0] // 11000001-11011011-00100001-10000111 +// CHECK-INST: fmla za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d[0] +// CHECK-ENCODING: [0x87,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db2187 + + fmla za.d[w8, 0, vgx2], {z0.d, z1.d}, {z0.d, z1.d} // 11000001, 11100000, 00011000, 00000000 // CHECK-INST: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z0.d, z1.d } // CHECK-ENCODING: [0x00,0x18,0xe0,0xc1] @@ -447,6 +592,151 @@ // CHECK-UNKNOWN: c12b3987 +fmla za.s[w8, 0, vgx2], {z0.s, z1.s}, z0.s[0] // 11000001-01010000-00000000-00000000 +// CHECK-INST: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s[0] +// CHECK-ENCODING: [0x00,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500000 + +fmla za.s[w8, 0], {z0.s, z1.s}, z0.s[0] // 11000001-01010000-00000000-00000000 +// CHECK-INST: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s[0] +// CHECK-ENCODING: [0x00,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500000 + +fmla za.s[w10, 5, vgx2], {z10.s, z11.s}, z5.s[1] // 11000001-01010101-01000101-01000101 +// CHECK-INST: fmla za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s[1] +// CHECK-ENCODING: [0x45,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554545 + +fmla za.s[w10, 5], {z10.s, z11.s}, z5.s[1] // 11000001-01010101-01000101-01000101 +// CHECK-INST: fmla za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s[1] +// CHECK-ENCODING: [0x45,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554545 + +fmla za.s[w11, 7, vgx2], {z12.s, z13.s}, z8.s[3] // 11000001-01011000-01101101-10000111 +// CHECK-INST: fmla za.s[w11, 7, vgx2], { z12.s, z13.s }, z8.s[3] +// CHECK-ENCODING: [0x87,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d87 + +fmla za.s[w11, 7], {z12.s, z13.s}, z8.s[3] // 11000001-01011000-01101101-10000111 +// CHECK-INST: fmla za.s[w11, 7, vgx2], { z12.s, z13.s }, z8.s[3] +// CHECK-ENCODING: [0x87,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d87 + +fmla za.s[w11, 7, vgx2], {z30.s, z31.s}, z15.s[3] // 11000001-01011111-01101111-11000111 +// CHECK-INST: fmla za.s[w11, 7, vgx2], { z30.s, z31.s }, z15.s[3] +// CHECK-ENCODING: [0xc7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fc7 + +fmla za.s[w11, 7], {z30.s, z31.s}, z15.s[3] // 11000001-01011111-01101111-11000111 +// CHECK-INST: fmla za.s[w11, 7, vgx2], { z30.s, z31.s }, z15.s[3] +// CHECK-ENCODING: [0xc7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fc7 + +fmla za.s[w8, 5, vgx2], {z16.s, z17.s}, z0.s[3] // 11000001-01010000-00001110-00000101 +// CHECK-INST: fmla za.s[w8, 5, vgx2], { z16.s, z17.s }, z0.s[3] +// CHECK-ENCODING: [0x05,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e05 + +fmla za.s[w8, 5], {z16.s, z17.s}, z0.s[3] // 11000001-01010000-00001110-00000101 +// CHECK-INST: fmla za.s[w8, 5, vgx2], { z16.s, z17.s }, z0.s[3] +// CHECK-ENCODING: [0x05,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e05 + +fmla za.s[w8, 1, vgx2], {z0.s, z1.s}, z14.s[1] // 11000001-01011110-00000100-00000001 +// CHECK-INST: fmla za.s[w8, 1, vgx2], { z0.s, z1.s }, z14.s[1] +// CHECK-ENCODING: [0x01,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0401 + +fmla za.s[w8, 1], {z0.s, z1.s}, z14.s[1] // 11000001-01011110-00000100-00000001 +// CHECK-INST: fmla za.s[w8, 1, vgx2], { z0.s, z1.s }, z14.s[1] +// CHECK-ENCODING: [0x01,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0401 + +fmla za.s[w10, 0, vgx2], {z18.s, z19.s}, z4.s[1] // 11000001-01010100-01000110-01000000 +// CHECK-INST: fmla za.s[w10, 0, vgx2], { z18.s, z19.s }, z4.s[1] +// CHECK-ENCODING: [0x40,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544640 + +fmla za.s[w10, 0], {z18.s, z19.s}, z4.s[1] // 11000001-01010100-01000110-01000000 +// CHECK-INST: fmla za.s[w10, 0, vgx2], { z18.s, z19.s }, z4.s[1] +// CHECK-ENCODING: [0x40,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544640 + +fmla za.s[w8, 0, vgx2], {z12.s, z13.s}, z2.s[2] // 11000001-01010010-00001001-10000000 +// CHECK-INST: fmla za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s[2] +// CHECK-ENCODING: [0x80,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520980 + +fmla za.s[w8, 0], {z12.s, z13.s}, z2.s[2] // 11000001-01010010-00001001-10000000 +// CHECK-INST: fmla za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s[2] +// CHECK-ENCODING: [0x80,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520980 + +fmla za.s[w10, 1, vgx2], {z0.s, z1.s}, z10.s[2] // 11000001-01011010-01001000-00000001 +// CHECK-INST: fmla za.s[w10, 1, vgx2], { z0.s, z1.s }, z10.s[2] +// CHECK-ENCODING: [0x01,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4801 + +fmla za.s[w10, 1], {z0.s, z1.s}, z10.s[2] // 11000001-01011010-01001000-00000001 +// CHECK-INST: fmla za.s[w10, 1, vgx2], { z0.s, z1.s }, z10.s[2] +// CHECK-ENCODING: [0x01,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4801 + +fmla za.s[w8, 5, vgx2], {z22.s, z23.s}, z14.s[2] // 11000001-01011110-00001010-11000101 +// CHECK-INST: fmla za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s[2] +// CHECK-ENCODING: [0xc5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0ac5 + +fmla za.s[w8, 5], {z22.s, z23.s}, z14.s[2] // 11000001-01011110-00001010-11000101 +// CHECK-INST: fmla za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s[2] +// CHECK-ENCODING: [0xc5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0ac5 + +fmla za.s[w11, 2, vgx2], {z8.s, z9.s}, z1.s[1] // 11000001-01010001-01100101-00000010 +// CHECK-INST: fmla za.s[w11, 2, vgx2], { z8.s, z9.s }, z1.s[1] +// CHECK-ENCODING: [0x02,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516502 + +fmla za.s[w11, 2], {z8.s, z9.s}, z1.s[1] // 11000001-01010001-01100101-00000010 +// CHECK-INST: fmla za.s[w11, 2, vgx2], { z8.s, z9.s }, z1.s[1] +// CHECK-ENCODING: [0x02,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516502 + +fmla za.s[w9, 7, vgx2], {z12.s, z13.s}, z11.s[2] // 11000001-01011011-00101001-10000111 +// CHECK-INST: fmla za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s[2] +// CHECK-ENCODING: [0x87,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b2987 + +fmla za.s[w9, 7], {z12.s, z13.s}, z11.s[2] // 11000001-01011011-00101001-10000111 +// CHECK-INST: fmla za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s[2] +// CHECK-ENCODING: [0x87,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b2987 + + fmla za.s[w8, 0, vgx2], {z0.s, z1.s}, {z0.s, z1.s} // 11000001, 10100000, 00011000, 00000000 // CHECK-INST: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z0.s, z1.s } // CHECK-ENCODING: [0x00,0x18,0xa0,0xc1] @@ -737,6 +1027,151 @@ // CHECK-UNKNOWN: c17b3987 +fmla za.d[w8, 0, vgx4], {z0.d - z3.d}, z0.d[0] // 11000001-11010000-10000000-00000000 +// CHECK-INST: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d[0] +// CHECK-ENCODING: [0x00,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08000 + +fmla za.d[w8, 0], {z0.d - z3.d}, z0.d[0] // 11000001-11010000-10000000-00000000 +// CHECK-INST: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d[0] +// CHECK-ENCODING: [0x00,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08000 + +fmla za.d[w10, 5, vgx4], {z8.d - z11.d}, z5.d[1] // 11000001-11010101-11000101-00000101 +// CHECK-INST: fmla za.d[w10, 5, vgx4], { z8.d - z11.d }, z5.d[1] +// CHECK-ENCODING: [0x05,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c505 + +fmla za.d[w10, 5], {z8.d - z11.d}, z5.d[1] // 11000001-11010101-11000101-00000101 +// CHECK-INST: fmla za.d[w10, 5, vgx4], { z8.d - z11.d }, z5.d[1] +// CHECK-ENCODING: [0x05,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c505 + +fmla za.d[w11, 7, vgx4], {z12.d - z15.d}, z8.d[1] // 11000001-11011000-11100101-10000111 +// CHECK-INST: fmla za.d[w11, 7, vgx4], { z12.d - z15.d }, z8.d[1] +// CHECK-ENCODING: [0x87,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e587 + +fmla za.d[w11, 7], {z12.d - z15.d}, z8.d[1] // 11000001-11011000-11100101-10000111 +// CHECK-INST: fmla za.d[w11, 7, vgx4], { z12.d - z15.d }, z8.d[1] +// CHECK-ENCODING: [0x87,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e587 + +fmla za.d[w11, 7, vgx4], {z28.d - z31.d}, z15.d[1] // 11000001-11011111-11100111-10000111 +// CHECK-INST: fmla za.d[w11, 7, vgx4], { z28.d - z31.d }, z15.d[1] +// CHECK-ENCODING: [0x87,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe787 + +fmla za.d[w11, 7], {z28.d - z31.d}, z15.d[1] // 11000001-11011111-11100111-10000111 +// CHECK-INST: fmla za.d[w11, 7, vgx4], { z28.d - z31.d }, z15.d[1] +// CHECK-ENCODING: [0x87,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe787 + +fmla za.d[w8, 5, vgx4], {z16.d - z19.d}, z0.d[1] // 11000001-11010000-10000110-00000101 +// CHECK-INST: fmla za.d[w8, 5, vgx4], { z16.d - z19.d }, z0.d[1] +// CHECK-ENCODING: [0x05,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08605 + +fmla za.d[w8, 5], {z16.d - z19.d}, z0.d[1] // 11000001-11010000-10000110-00000101 +// CHECK-INST: fmla za.d[w8, 5, vgx4], { z16.d - z19.d }, z0.d[1] +// CHECK-ENCODING: [0x05,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08605 + +fmla za.d[w8, 1, vgx4], {z0.d - z3.d}, z14.d[1] // 11000001-11011110-10000100-00000001 +// CHECK-INST: fmla za.d[w8, 1, vgx4], { z0.d - z3.d }, z14.d[1] +// CHECK-ENCODING: [0x01,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8401 + +fmla za.d[w8, 1], {z0.d - z3.d}, z14.d[1] // 11000001-11011110-10000100-00000001 +// CHECK-INST: fmla za.d[w8, 1, vgx4], { z0.d - z3.d }, z14.d[1] +// CHECK-ENCODING: [0x01,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8401 + +fmla za.d[w10, 0, vgx4], {z16.d - z19.d}, z4.d[1] // 11000001-11010100-11000110-00000000 +// CHECK-INST: fmla za.d[w10, 0, vgx4], { z16.d - z19.d }, z4.d[1] +// CHECK-ENCODING: [0x00,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c600 + +fmla za.d[w10, 0], {z16.d - z19.d}, z4.d[1] // 11000001-11010100-11000110-00000000 +// CHECK-INST: fmla za.d[w10, 0, vgx4], { z16.d - z19.d }, z4.d[1] +// CHECK-ENCODING: [0x00,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c600 + +fmla za.d[w8, 0, vgx4], {z12.d - z15.d}, z2.d[0] // 11000001-11010010-10000001-10000000 +// CHECK-INST: fmla za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d[0] +// CHECK-ENCODING: [0x80,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28180 + +fmla za.d[w8, 0], {z12.d - z15.d}, z2.d[0] // 11000001-11010010-10000001-10000000 +// CHECK-INST: fmla za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d[0] +// CHECK-ENCODING: [0x80,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28180 + +fmla za.d[w10, 1, vgx4], {z0.d - z3.d}, z10.d[0] // 11000001-11011010-11000000-00000001 +// CHECK-INST: fmla za.d[w10, 1, vgx4], { z0.d - z3.d }, z10.d[0] +// CHECK-ENCODING: [0x01,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac001 + +fmla za.d[w10, 1], {z0.d - z3.d}, z10.d[0] // 11000001-11011010-11000000-00000001 +// CHECK-INST: fmla za.d[w10, 1, vgx4], { z0.d - z3.d }, z10.d[0] +// CHECK-ENCODING: [0x01,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac001 + +fmla za.d[w8, 5, vgx4], {z20.d - z23.d}, z14.d[0] // 11000001-11011110-10000010-10000101 +// CHECK-INST: fmla za.d[w8, 5, vgx4], { z20.d - z23.d }, z14.d[0] +// CHECK-ENCODING: [0x85,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8285 + +fmla za.d[w8, 5], {z20.d - z23.d}, z14.d[0] // 11000001-11011110-10000010-10000101 +// CHECK-INST: fmla za.d[w8, 5, vgx4], { z20.d - z23.d }, z14.d[0] +// CHECK-ENCODING: [0x85,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8285 + +fmla za.d[w11, 2, vgx4], {z8.d - z11.d}, z1.d[1] // 11000001-11010001-11100101-00000010 +// CHECK-INST: fmla za.d[w11, 2, vgx4], { z8.d - z11.d }, z1.d[1] +// CHECK-ENCODING: [0x02,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e502 + +fmla za.d[w11, 2], {z8.d - z11.d}, z1.d[1] // 11000001-11010001-11100101-00000010 +// CHECK-INST: fmla za.d[w11, 2, vgx4], { z8.d - z11.d }, z1.d[1] +// CHECK-ENCODING: [0x02,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e502 + +fmla za.d[w9, 7, vgx4], {z12.d - z15.d}, z11.d[0] // 11000001-11011011-10100001-10000111 +// CHECK-INST: fmla za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d[0] +// CHECK-ENCODING: [0x87,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba187 + +fmla za.d[w9, 7], {z12.d - z15.d}, z11.d[0] // 11000001-11011011-10100001-10000111 +// CHECK-INST: fmla za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d[0] +// CHECK-ENCODING: [0x87,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba187 + + fmla za.d[w8, 0, vgx4], {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100001-00011000-00000000 // CHECK-INST: fmla za.d[w8, 0, vgx4], { z0.d - z3.d }, { z0.d - z3.d } // CHECK-ENCODING: [0x00,0x18,0xe1,0xc1] @@ -1027,6 +1462,150 @@ // CHECK-UNKNOWN: c13b3987 +fmla za.s[w8, 0, vgx4], {z0.s - z3.s}, z0.s[0] // 11000001-01010000-10000000-00000000 +// CHECK-INST: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s[0] +// CHECK-ENCODING: [0x00,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508000 + +fmla za.s[w8, 0], {z0.s - z3.s}, z0.s[0] // 11000001-01010000-10000000-00000000 +// CHECK-INST: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s[0] +// CHECK-ENCODING: [0x00,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508000 + +fmla za.s[w10, 5, vgx4], {z8.s - z11.s}, z5.s[1] // 11000001-01010101-11000101-00000101 +// CHECK-INST: fmla za.s[w10, 5, vgx4], { z8.s - z11.s }, z5.s[1] +// CHECK-ENCODING: [0x05,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c505 + +fmla za.s[w10, 5], {z8.s - z11.s}, z5.s[1] // 11000001-01010101-11000101-00000101 +// CHECK-INST: fmla za.s[w10, 5, vgx4], { z8.s - z11.s }, z5.s[1] +// CHECK-ENCODING: [0x05,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c505 + +fmla za.s[w11, 7, vgx4], {z12.s - z15.s}, z8.s[3] // 11000001-01011000-11101101-10000111 +// CHECK-INST: fmla za.s[w11, 7, vgx4], { z12.s - z15.s }, z8.s[3] +// CHECK-ENCODING: [0x87,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158ed87 + +fmla za.s[w11, 7], {z12.s - z15.s}, z8.s[3] // 11000001-01011000-11101101-10000111 +// CHECK-INST: fmla za.s[w11, 7, vgx4], { z12.s - z15.s }, z8.s[3] +// CHECK-ENCODING: [0x87,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158ed87 + +fmla za.s[w11, 7, vgx4], {z28.s - z31.s}, z15.s[3] // 11000001-01011111-11101111-10000111 +// CHECK-INST: fmla za.s[w11, 7, vgx4], { z28.s - z31.s }, z15.s[3] +// CHECK-ENCODING: [0x87,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fef87 + +fmla za.s[w11, 7], {z28.s - z31.s}, z15.s[3] // 11000001-01011111-11101111-10000111 +// CHECK-INST: fmla za.s[w11, 7, vgx4], { z28.s - z31.s }, z15.s[3] +// CHECK-ENCODING: [0x87,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fef87 + +fmla za.s[w8, 5, vgx4], {z16.s - z19.s}, z0.s[3] // 11000001-01010000-10001110-00000101 +// CHECK-INST: fmla za.s[w8, 5, vgx4], { z16.s - z19.s }, z0.s[3] +// CHECK-ENCODING: [0x05,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e05 + +fmla za.s[w8, 5], {z16.s - z19.s}, z0.s[3] // 11000001-01010000-10001110-00000101 +// CHECK-INST: fmla za.s[w8, 5, vgx4], { z16.s - z19.s }, z0.s[3] +// CHECK-ENCODING: [0x05,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e05 + +fmla za.s[w8, 1, vgx4], {z0.s - z3.s}, z14.s[1] // 11000001-01011110-10000100-00000001 +// CHECK-INST: fmla za.s[w8, 1, vgx4], { z0.s - z3.s }, z14.s[1] +// CHECK-ENCODING: [0x01,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8401 + +fmla za.s[w8, 1], {z0.s - z3.s}, z14.s[1] // 11000001-01011110-10000100-00000001 +// CHECK-INST: fmla za.s[w8, 1, vgx4], { z0.s - z3.s }, z14.s[1] +// CHECK-ENCODING: [0x01,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8401 + +fmla za.s[w10, 0, vgx4], {z16.s - z19.s}, z4.s[1] // 11000001-01010100-11000110-00000000 +// CHECK-INST: fmla za.s[w10, 0, vgx4], { z16.s - z19.s }, z4.s[1] +// CHECK-ENCODING: [0x00,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c600 + +fmla za.s[w10, 0], {z16.s - z19.s}, z4.s[1] // 11000001-01010100-11000110-00000000 +// CHECK-INST: fmla za.s[w10, 0, vgx4], { z16.s - z19.s }, z4.s[1] +// CHECK-ENCODING: [0x00,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c600 + +fmla za.s[w8, 0, vgx4], {z12.s - z15.s}, z2.s[2] // 11000001-01010010-10001001-10000000 +// CHECK-INST: fmla za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s[2] +// CHECK-ENCODING: [0x80,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1528980 + +fmla za.s[w8, 0], {z12.s - z15.s}, z2.s[2] // 11000001-01010010-10001001-10000000 +// CHECK-INST: fmla za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s[2] +// CHECK-ENCODING: [0x80,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1528980 + +fmla za.s[w10, 1, vgx4], {z0.s - z3.s}, z10.s[2] // 11000001-01011010-11001000-00000001 +// CHECK-INST: fmla za.s[w10, 1, vgx4], { z0.s - z3.s }, z10.s[2] +// CHECK-ENCODING: [0x01,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac801 + +fmla za.s[w10, 1], {z0.s - z3.s}, z10.s[2] // 11000001-01011010-11001000-00000001 +// CHECK-INST: fmla za.s[w10, 1, vgx4], { z0.s - z3.s }, z10.s[2] +// CHECK-ENCODING: [0x01,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac801 + +fmla za.s[w8, 5, vgx4], {z20.s - z23.s}, z14.s[2] // 11000001-01011110-10001010-10000101 +// CHECK-INST: fmla za.s[w8, 5, vgx4], { z20.s - z23.s }, z14.s[2] +// CHECK-ENCODING: [0x85,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8a85 + +fmla za.s[w8, 5], {z20.s - z23.s}, z14.s[2] // 11000001-01011110-10001010-10000101 +// CHECK-INST: fmla za.s[w8, 5, vgx4], { z20.s - z23.s }, z14.s[2] +// CHECK-ENCODING: [0x85,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8a85 + +fmla za.s[w11, 2, vgx4], {z8.s - z11.s}, z1.s[1] // 11000001-01010001-11100101-00000010 +// CHECK-INST: fmla za.s[w11, 2, vgx4], { z8.s - z11.s }, z1.s[1] +// CHECK-ENCODING: [0x02,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e502 + +fmla za.s[w11, 2], {z8.s - z11.s}, z1.s[1] // 11000001-01010001-11100101-00000010 +// CHECK-INST: fmla za.s[w11, 2, vgx4], { z8.s - z11.s }, z1.s[1] +// CHECK-ENCODING: [0x02,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e502 + +fmla za.s[w9, 7, vgx4], {z12.s - z15.s}, z11.s[2] // 11000001-01011011-10101001-10000111 +// CHECK-INST: fmla za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s[2] +// CHECK-ENCODING: [0x87,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba987 + +fmla za.s[w9, 7], {z12.s - z15.s}, z11.s[2] // 11000001-01011011-10101001-10000111 +// CHECK-INST: fmla za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s[2] +// CHECK-ENCODING: [0x87,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba987 + fmla za.s[w8, 0, vgx4], {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100001-00011000-00000000 // CHECK-INST: fmla za.s[w8, 0, vgx4], { z0.s - z3.s }, { z0.s - z3.s } diff --git a/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s b/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s --- a/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/fmls-diagnostics.s @@ -52,7 +52,7 @@ // Invalid vector list. fmls za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: fmls za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fmls.s b/llvm/test/MC/AArch64/SME2/fmls.s --- a/llvm/test/MC/AArch64/SME2/fmls.s +++ b/llvm/test/MC/AArch64/SME2/fmls.s @@ -157,7 +157,152 @@ // CHECK-UNKNOWN: c16b398f -fmls za.d[w8, 0, vgx2], {z0.d, z1.d}, {z0.d, z1.d} // 11000001, 11100000, 00011000, 00001000 +fmls za.d[w8, 0, vgx2], {z0.d, z1.d}, z0.d[0] // 11000001-11010000-00000000-00010000 +// CHECK-INST: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d[0] +// CHECK-ENCODING: [0x10,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00010 + +fmls za.d[w8, 0], {z0.d, z1.d}, z0.d[0] // 11000001-11010000-00000000-00010000 +// CHECK-INST: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z0.d[0] +// CHECK-ENCODING: [0x10,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00010 + +fmls za.d[w10, 5, vgx2], {z10.d, z11.d}, z5.d[1] // 11000001-11010101-01000101-01010101 +// CHECK-INST: fmls za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d[1] +// CHECK-ENCODING: [0x55,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d54555 + +fmls za.d[w10, 5], {z10.d, z11.d}, z5.d[1] // 11000001-11010101-01000101-01010101 +// CHECK-INST: fmls za.d[w10, 5, vgx2], { z10.d, z11.d }, z5.d[1] +// CHECK-ENCODING: [0x55,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d54555 + +fmls za.d[w11, 7, vgx2], {z12.d, z13.d}, z8.d[1] // 11000001-11011000-01100101-10010111 +// CHECK-INST: fmls za.d[w11, 7, vgx2], { z12.d, z13.d }, z8.d[1] +// CHECK-ENCODING: [0x97,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d86597 + +fmls za.d[w11, 7], {z12.d, z13.d}, z8.d[1] // 11000001-11011000-01100101-10010111 +// CHECK-INST: fmls za.d[w11, 7, vgx2], { z12.d, z13.d }, z8.d[1] +// CHECK-ENCODING: [0x97,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d86597 + +fmls za.d[w11, 7, vgx2], {z30.d, z31.d}, z15.d[1] // 11000001-11011111-01100111-11010111 +// CHECK-INST: fmls za.d[w11, 7, vgx2], { z30.d, z31.d }, z15.d[1] +// CHECK-ENCODING: [0xd7,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67d7 + +fmls za.d[w11, 7], {z30.d, z31.d}, z15.d[1] // 11000001-11011111-01100111-11010111 +// CHECK-INST: fmls za.d[w11, 7, vgx2], { z30.d, z31.d }, z15.d[1] +// CHECK-ENCODING: [0xd7,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67d7 + +fmls za.d[w8, 5, vgx2], {z16.d, z17.d}, z0.d[1] // 11000001-11010000-00000110-00010101 +// CHECK-INST: fmls za.d[w8, 5, vgx2], { z16.d, z17.d }, z0.d[1] +// CHECK-ENCODING: [0x15,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00615 + +fmls za.d[w8, 5], {z16.d, z17.d}, z0.d[1] // 11000001-11010000-00000110-00010101 +// CHECK-INST: fmls za.d[w8, 5, vgx2], { z16.d, z17.d }, z0.d[1] +// CHECK-ENCODING: [0x15,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00615 + +fmls za.d[w8, 1, vgx2], {z0.d, z1.d}, z14.d[1] // 11000001-11011110-00000100-00010001 +// CHECK-INST: fmls za.d[w8, 1, vgx2], { z0.d, z1.d }, z14.d[1] +// CHECK-ENCODING: [0x11,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0411 + +fmls za.d[w8, 1], {z0.d, z1.d}, z14.d[1] // 11000001-11011110-00000100-00010001 +// CHECK-INST: fmls za.d[w8, 1, vgx2], { z0.d, z1.d }, z14.d[1] +// CHECK-ENCODING: [0x11,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0411 + +fmls za.d[w10, 0, vgx2], {z18.d, z19.d}, z4.d[1] // 11000001-11010100-01000110-01010000 +// CHECK-INST: fmls za.d[w10, 0, vgx2], { z18.d, z19.d }, z4.d[1] +// CHECK-ENCODING: [0x50,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44650 + +fmls za.d[w10, 0], {z18.d, z19.d}, z4.d[1] // 11000001-11010100-01000110-01010000 +// CHECK-INST: fmls za.d[w10, 0, vgx2], { z18.d, z19.d }, z4.d[1] +// CHECK-ENCODING: [0x50,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44650 + +fmls za.d[w8, 0, vgx2], {z12.d, z13.d}, z2.d[0] // 11000001-11010010-00000001-10010000 +// CHECK-INST: fmls za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d[0] +// CHECK-ENCODING: [0x90,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20190 + +fmls za.d[w8, 0], {z12.d, z13.d}, z2.d[0] // 11000001-11010010-00000001-10010000 +// CHECK-INST: fmls za.d[w8, 0, vgx2], { z12.d, z13.d }, z2.d[0] +// CHECK-ENCODING: [0x90,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20190 + +fmls za.d[w10, 1, vgx2], {z0.d, z1.d}, z10.d[0] // 11000001-11011010-01000000-00010001 +// CHECK-INST: fmls za.d[w10, 1, vgx2], { z0.d, z1.d }, z10.d[0] +// CHECK-ENCODING: [0x11,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4011 + +fmls za.d[w10, 1], {z0.d, z1.d}, z10.d[0] // 11000001-11011010-01000000-00010001 +// CHECK-INST: fmls za.d[w10, 1, vgx2], { z0.d, z1.d }, z10.d[0] +// CHECK-ENCODING: [0x11,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4011 + +fmls za.d[w8, 5, vgx2], {z22.d, z23.d}, z14.d[0] // 11000001-11011110-00000010-11010101 +// CHECK-INST: fmls za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d[0] +// CHECK-ENCODING: [0xd5,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02d5 + +fmls za.d[w8, 5], {z22.d, z23.d}, z14.d[0] // 11000001-11011110-00000010-11010101 +// CHECK-INST: fmls za.d[w8, 5, vgx2], { z22.d, z23.d }, z14.d[0] +// CHECK-ENCODING: [0xd5,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02d5 + +fmls za.d[w11, 2, vgx2], {z8.d, z9.d}, z1.d[1] // 11000001-11010001-01100101-00010010 +// CHECK-INST: fmls za.d[w11, 2, vgx2], { z8.d, z9.d }, z1.d[1] +// CHECK-ENCODING: [0x12,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d16512 + +fmls za.d[w11, 2], {z8.d, z9.d}, z1.d[1] // 11000001-11010001-01100101-00010010 +// CHECK-INST: fmls za.d[w11, 2, vgx2], { z8.d, z9.d }, z1.d[1] +// CHECK-ENCODING: [0x12,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d16512 + +fmls za.d[w9, 7, vgx2], {z12.d, z13.d}, z11.d[0] // 11000001-11011011-00100001-10010111 +// CHECK-INST: fmls za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d[0] +// CHECK-ENCODING: [0x97,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db2197 + +fmls za.d[w9, 7], {z12.d, z13.d}, z11.d[0] // 11000001-11011011-00100001-10010111 +// CHECK-INST: fmls za.d[w9, 7, vgx2], { z12.d, z13.d }, z11.d[0] +// CHECK-ENCODING: [0x97,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db2197 + + +fmls za.d[w8, 0, vgx2], {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-00011000-00001000 // CHECK-INST: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z0.d, z1.d } // CHECK-ENCODING: [0x08,0x18,0xe0,0xc1] // CHECK-ERROR: instruction requires: sme2 @@ -447,7 +592,152 @@ // CHECK-UNKNOWN: c12b398f -fmls za.s[w8, 0, vgx2], {z0.s, z1.s}, {z0.s, z1.s} // 11000001, 10100000, 00011000, 00001000 +fmls za.s[w8, 0, vgx2], {z0.s, z1.s}, z0.s[0] // 11000001-01010000-00000000-00010000 +// CHECK-INST: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s[0] +// CHECK-ENCODING: [0x10,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500010 + +fmls za.s[w8, 0], {z0.s, z1.s}, z0.s[0] // 11000001-01010000-00000000-00010000 +// CHECK-INST: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z0.s[0] +// CHECK-ENCODING: [0x10,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500010 + +fmls za.s[w10, 5, vgx2], {z10.s, z11.s}, z5.s[1] // 11000001-01010101-01000101-01010101 +// CHECK-INST: fmls za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s[1] +// CHECK-ENCODING: [0x55,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554555 + +fmls za.s[w10, 5], {z10.s, z11.s}, z5.s[1] // 11000001-01010101-01000101-01010101 +// CHECK-INST: fmls za.s[w10, 5, vgx2], { z10.s, z11.s }, z5.s[1] +// CHECK-ENCODING: [0x55,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554555 + +fmls za.s[w11, 7, vgx2], {z12.s, z13.s}, z8.s[3] // 11000001-01011000-01101101-10010111 +// CHECK-INST: fmls za.s[w11, 7, vgx2], { z12.s, z13.s }, z8.s[3] +// CHECK-ENCODING: [0x97,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d97 + +fmls za.s[w11, 7], {z12.s, z13.s}, z8.s[3] // 11000001-01011000-01101101-10010111 +// CHECK-INST: fmls za.s[w11, 7, vgx2], { z12.s, z13.s }, z8.s[3] +// CHECK-ENCODING: [0x97,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d97 + +fmls za.s[w11, 7, vgx2], {z30.s, z31.s}, z15.s[3] // 11000001-01011111-01101111-11010111 +// CHECK-INST: fmls za.s[w11, 7, vgx2], { z30.s, z31.s }, z15.s[3] +// CHECK-ENCODING: [0xd7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fd7 + +fmls za.s[w11, 7], {z30.s, z31.s}, z15.s[3] // 11000001-01011111-01101111-11010111 +// CHECK-INST: fmls za.s[w11, 7, vgx2], { z30.s, z31.s }, z15.s[3] +// CHECK-ENCODING: [0xd7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fd7 + +fmls za.s[w8, 5, vgx2], {z16.s, z17.s}, z0.s[3] // 11000001-01010000-00001110-00010101 +// CHECK-INST: fmls za.s[w8, 5, vgx2], { z16.s, z17.s }, z0.s[3] +// CHECK-ENCODING: [0x15,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e15 + +fmls za.s[w8, 5], {z16.s, z17.s}, z0.s[3] // 11000001-01010000-00001110-00010101 +// CHECK-INST: fmls za.s[w8, 5, vgx2], { z16.s, z17.s }, z0.s[3] +// CHECK-ENCODING: [0x15,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e15 + +fmls za.s[w8, 1, vgx2], {z0.s, z1.s}, z14.s[1] // 11000001-01011110-00000100-00010001 +// CHECK-INST: fmls za.s[w8, 1, vgx2], { z0.s, z1.s }, z14.s[1] +// CHECK-ENCODING: [0x11,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0411 + +fmls za.s[w8, 1], {z0.s, z1.s}, z14.s[1] // 11000001-01011110-00000100-00010001 +// CHECK-INST: fmls za.s[w8, 1, vgx2], { z0.s, z1.s }, z14.s[1] +// CHECK-ENCODING: [0x11,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0411 + +fmls za.s[w10, 0, vgx2], {z18.s, z19.s}, z4.s[1] // 11000001-01010100-01000110-01010000 +// CHECK-INST: fmls za.s[w10, 0, vgx2], { z18.s, z19.s }, z4.s[1] +// CHECK-ENCODING: [0x50,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544650 + +fmls za.s[w10, 0], {z18.s, z19.s}, z4.s[1] // 11000001-01010100-01000110-01010000 +// CHECK-INST: fmls za.s[w10, 0, vgx2], { z18.s, z19.s }, z4.s[1] +// CHECK-ENCODING: [0x50,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544650 + +fmls za.s[w8, 0, vgx2], {z12.s, z13.s}, z2.s[2] // 11000001-01010010-00001001-10010000 +// CHECK-INST: fmls za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s[2] +// CHECK-ENCODING: [0x90,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520990 + +fmls za.s[w8, 0], {z12.s, z13.s}, z2.s[2] // 11000001-01010010-00001001-10010000 +// CHECK-INST: fmls za.s[w8, 0, vgx2], { z12.s, z13.s }, z2.s[2] +// CHECK-ENCODING: [0x90,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520990 + +fmls za.s[w10, 1, vgx2], {z0.s, z1.s}, z10.s[2] // 11000001-01011010-01001000-00010001 +// CHECK-INST: fmls za.s[w10, 1, vgx2], { z0.s, z1.s }, z10.s[2] +// CHECK-ENCODING: [0x11,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4811 + +fmls za.s[w10, 1], {z0.s, z1.s}, z10.s[2] // 11000001-01011010-01001000-00010001 +// CHECK-INST: fmls za.s[w10, 1, vgx2], { z0.s, z1.s }, z10.s[2] +// CHECK-ENCODING: [0x11,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4811 + +fmls za.s[w8, 5, vgx2], {z22.s, z23.s}, z14.s[2] // 11000001-01011110-00001010-11010101 +// CHECK-INST: fmls za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s[2] +// CHECK-ENCODING: [0xd5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0ad5 + +fmls za.s[w8, 5], {z22.s, z23.s}, z14.s[2] // 11000001-01011110-00001010-11010101 +// CHECK-INST: fmls za.s[w8, 5, vgx2], { z22.s, z23.s }, z14.s[2] +// CHECK-ENCODING: [0xd5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0ad5 + +fmls za.s[w11, 2, vgx2], {z8.s, z9.s}, z1.s[1] // 11000001-01010001-01100101-00010010 +// CHECK-INST: fmls za.s[w11, 2, vgx2], { z8.s, z9.s }, z1.s[1] +// CHECK-ENCODING: [0x12,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516512 + +fmls za.s[w11, 2], {z8.s, z9.s}, z1.s[1] // 11000001-01010001-01100101-00010010 +// CHECK-INST: fmls za.s[w11, 2, vgx2], { z8.s, z9.s }, z1.s[1] +// CHECK-ENCODING: [0x12,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516512 + +fmls za.s[w9, 7, vgx2], {z12.s, z13.s}, z11.s[2] // 11000001-01011011-00101001-10010111 +// CHECK-INST: fmls za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s[2] +// CHECK-ENCODING: [0x97,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b2997 + +fmls za.s[w9, 7], {z12.s, z13.s}, z11.s[2] // 11000001-01011011-00101001-10010111 +// CHECK-INST: fmls za.s[w9, 7, vgx2], { z12.s, z13.s }, z11.s[2] +// CHECK-ENCODING: [0x97,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b2997 + + +fmls za.s[w8, 0, vgx2], {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-00011000-00001000 // CHECK-INST: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z0.s, z1.s } // CHECK-ENCODING: [0x08,0x18,0xa0,0xc1] // CHECK-ERROR: instruction requires: sme2 @@ -737,6 +1027,151 @@ // CHECK-UNKNOWN: c17b398f +fmls za.d[w8, 0, vgx4], {z0.d - z3.d}, z0.d[0] // 11000001-11010000-10000000-00010000 +// CHECK-INST: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d[0] +// CHECK-ENCODING: [0x10,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08010 + +fmls za.d[w8, 0], {z0.d - z3.d}, z0.d[0] // 11000001-11010000-10000000-00010000 +// CHECK-INST: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, z0.d[0] +// CHECK-ENCODING: [0x10,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08010 + +fmls za.d[w10, 5, vgx4], {z8.d - z11.d}, z5.d[1] // 11000001-11010101-11000101-00010101 +// CHECK-INST: fmls za.d[w10, 5, vgx4], { z8.d - z11.d }, z5.d[1] +// CHECK-ENCODING: [0x15,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c515 + +fmls za.d[w10, 5], {z8.d - z11.d}, z5.d[1] // 11000001-11010101-11000101-00010101 +// CHECK-INST: fmls za.d[w10, 5, vgx4], { z8.d - z11.d }, z5.d[1] +// CHECK-ENCODING: [0x15,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c515 + +fmls za.d[w11, 7, vgx4], {z12.d - z15.d}, z8.d[1] // 11000001-11011000-11100101-10010111 +// CHECK-INST: fmls za.d[w11, 7, vgx4], { z12.d - z15.d }, z8.d[1] +// CHECK-ENCODING: [0x97,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e597 + +fmls za.d[w11, 7], {z12.d - z15.d}, z8.d[1] // 11000001-11011000-11100101-10010111 +// CHECK-INST: fmls za.d[w11, 7, vgx4], { z12.d - z15.d }, z8.d[1] +// CHECK-ENCODING: [0x97,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e597 + +fmls za.d[w11, 7, vgx4], {z28.d - z31.d}, z15.d[1] // 11000001-11011111-11100111-10010111 +// CHECK-INST: fmls za.d[w11, 7, vgx4], { z28.d - z31.d }, z15.d[1] +// CHECK-ENCODING: [0x97,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe797 + +fmls za.d[w11, 7], {z28.d - z31.d}, z15.d[1] // 11000001-11011111-11100111-10010111 +// CHECK-INST: fmls za.d[w11, 7, vgx4], { z28.d - z31.d }, z15.d[1] +// CHECK-ENCODING: [0x97,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe797 + +fmls za.d[w8, 5, vgx4], {z16.d - z19.d}, z0.d[1] // 11000001-11010000-10000110-00010101 +// CHECK-INST: fmls za.d[w8, 5, vgx4], { z16.d - z19.d }, z0.d[1] +// CHECK-ENCODING: [0x15,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08615 + +fmls za.d[w8, 5], {z16.d - z19.d}, z0.d[1] // 11000001-11010000-10000110-00010101 +// CHECK-INST: fmls za.d[w8, 5, vgx4], { z16.d - z19.d }, z0.d[1] +// CHECK-ENCODING: [0x15,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08615 + +fmls za.d[w8, 1, vgx4], {z0.d - z3.d}, z14.d[1] // 11000001-11011110-10000100-00010001 +// CHECK-INST: fmls za.d[w8, 1, vgx4], { z0.d - z3.d }, z14.d[1] +// CHECK-ENCODING: [0x11,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8411 + +fmls za.d[w8, 1], {z0.d - z3.d}, z14.d[1] // 11000001-11011110-10000100-00010001 +// CHECK-INST: fmls za.d[w8, 1, vgx4], { z0.d - z3.d }, z14.d[1] +// CHECK-ENCODING: [0x11,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8411 + +fmls za.d[w10, 0, vgx4], {z16.d - z19.d}, z4.d[1] // 11000001-11010100-11000110-00010000 +// CHECK-INST: fmls za.d[w10, 0, vgx4], { z16.d - z19.d }, z4.d[1] +// CHECK-ENCODING: [0x10,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c610 + +fmls za.d[w10, 0], {z16.d - z19.d}, z4.d[1] // 11000001-11010100-11000110-00010000 +// CHECK-INST: fmls za.d[w10, 0, vgx4], { z16.d - z19.d }, z4.d[1] +// CHECK-ENCODING: [0x10,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c610 + +fmls za.d[w8, 0, vgx4], {z12.d - z15.d}, z2.d[0] // 11000001-11010010-10000001-10010000 +// CHECK-INST: fmls za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d[0] +// CHECK-ENCODING: [0x90,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28190 + +fmls za.d[w8, 0], {z12.d - z15.d}, z2.d[0] // 11000001-11010010-10000001-10010000 +// CHECK-INST: fmls za.d[w8, 0, vgx4], { z12.d - z15.d }, z2.d[0] +// CHECK-ENCODING: [0x90,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28190 + +fmls za.d[w10, 1, vgx4], {z0.d - z3.d}, z10.d[0] // 11000001-11011010-11000000-00010001 +// CHECK-INST: fmls za.d[w10, 1, vgx4], { z0.d - z3.d }, z10.d[0] +// CHECK-ENCODING: [0x11,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac011 + +fmls za.d[w10, 1], {z0.d - z3.d}, z10.d[0] // 11000001-11011010-11000000-00010001 +// CHECK-INST: fmls za.d[w10, 1, vgx4], { z0.d - z3.d }, z10.d[0] +// CHECK-ENCODING: [0x11,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac011 + +fmls za.d[w8, 5, vgx4], {z20.d - z23.d}, z14.d[0] // 11000001-11011110-10000010-10010101 +// CHECK-INST: fmls za.d[w8, 5, vgx4], { z20.d - z23.d }, z14.d[0] +// CHECK-ENCODING: [0x95,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8295 + +fmls za.d[w8, 5], {z20.d - z23.d}, z14.d[0] // 11000001-11011110-10000010-10010101 +// CHECK-INST: fmls za.d[w8, 5, vgx4], { z20.d - z23.d }, z14.d[0] +// CHECK-ENCODING: [0x95,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8295 + +fmls za.d[w11, 2, vgx4], {z8.d - z11.d}, z1.d[1] // 11000001-11010001-11100101-00010010 +// CHECK-INST: fmls za.d[w11, 2, vgx4], { z8.d - z11.d }, z1.d[1] +// CHECK-ENCODING: [0x12,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e512 + +fmls za.d[w11, 2], {z8.d - z11.d}, z1.d[1] // 11000001-11010001-11100101-00010010 +// CHECK-INST: fmls za.d[w11, 2, vgx4], { z8.d - z11.d }, z1.d[1] +// CHECK-ENCODING: [0x12,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e512 + +fmls za.d[w9, 7, vgx4], {z12.d - z15.d}, z11.d[0] // 11000001-11011011-10100001-10010111 +// CHECK-INST: fmls za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d[0] +// CHECK-ENCODING: [0x97,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba197 + +fmls za.d[w9, 7], {z12.d - z15.d}, z11.d[0] // 11000001-11011011-10100001-10010111 +// CHECK-INST: fmls za.d[w9, 7, vgx4], { z12.d - z15.d }, z11.d[0] +// CHECK-ENCODING: [0x97,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba197 + + fmls za.d[w8, 0, vgx4], {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100001-00011000-00001000 // CHECK-INST: fmls za.d[w8, 0, vgx4], { z0.d - z3.d }, { z0.d - z3.d } // CHECK-ENCODING: [0x08,0x18,0xe1,0xc1] @@ -1027,6 +1462,151 @@ // CHECK-UNKNOWN: c13b398f +fmls za.s[w8, 0, vgx4], {z0.s - z3.s}, z0.s[0] // 11000001-01010000-10000000-00010000 +// CHECK-INST: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s[0] +// CHECK-ENCODING: [0x10,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508010 + +fmls za.s[w8, 0], {z0.s - z3.s}, z0.s[0] // 11000001-01010000-10000000-00010000 +// CHECK-INST: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, z0.s[0] +// CHECK-ENCODING: [0x10,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508010 + +fmls za.s[w10, 5, vgx4], {z8.s - z11.s}, z5.s[1] // 11000001-01010101-11000101-00010101 +// CHECK-INST: fmls za.s[w10, 5, vgx4], { z8.s - z11.s }, z5.s[1] +// CHECK-ENCODING: [0x15,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c515 + +fmls za.s[w10, 5], {z8.s - z11.s}, z5.s[1] // 11000001-01010101-11000101-00010101 +// CHECK-INST: fmls za.s[w10, 5, vgx4], { z8.s - z11.s }, z5.s[1] +// CHECK-ENCODING: [0x15,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c515 + +fmls za.s[w11, 7, vgx4], {z12.s - z15.s}, z8.s[3] // 11000001-01011000-11101101-10010111 +// CHECK-INST: fmls za.s[w11, 7, vgx4], { z12.s - z15.s }, z8.s[3] +// CHECK-ENCODING: [0x97,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158ed97 + +fmls za.s[w11, 7], {z12.s - z15.s}, z8.s[3] // 11000001-01011000-11101101-10010111 +// CHECK-INST: fmls za.s[w11, 7, vgx4], { z12.s - z15.s }, z8.s[3] +// CHECK-ENCODING: [0x97,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158ed97 + +fmls za.s[w11, 7, vgx4], {z28.s - z31.s}, z15.s[3] // 11000001-01011111-11101111-10010111 +// CHECK-INST: fmls za.s[w11, 7, vgx4], { z28.s - z31.s }, z15.s[3] +// CHECK-ENCODING: [0x97,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fef97 + +fmls za.s[w11, 7], {z28.s - z31.s}, z15.s[3] // 11000001-01011111-11101111-10010111 +// CHECK-INST: fmls za.s[w11, 7, vgx4], { z28.s - z31.s }, z15.s[3] +// CHECK-ENCODING: [0x97,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fef97 + +fmls za.s[w8, 5, vgx4], {z16.s - z19.s}, z0.s[3] // 11000001-01010000-10001110-00010101 +// CHECK-INST: fmls za.s[w8, 5, vgx4], { z16.s - z19.s }, z0.s[3] +// CHECK-ENCODING: [0x15,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e15 + +fmls za.s[w8, 5], {z16.s - z19.s}, z0.s[3] // 11000001-01010000-10001110-00010101 +// CHECK-INST: fmls za.s[w8, 5, vgx4], { z16.s - z19.s }, z0.s[3] +// CHECK-ENCODING: [0x15,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e15 + +fmls za.s[w8, 1, vgx4], {z0.s - z3.s}, z14.s[1] // 11000001-01011110-10000100-00010001 +// CHECK-INST: fmls za.s[w8, 1, vgx4], { z0.s - z3.s }, z14.s[1] +// CHECK-ENCODING: [0x11,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8411 + +fmls za.s[w8, 1], {z0.s - z3.s}, z14.s[1] // 11000001-01011110-10000100-00010001 +// CHECK-INST: fmls za.s[w8, 1, vgx4], { z0.s - z3.s }, z14.s[1] +// CHECK-ENCODING: [0x11,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8411 + +fmls za.s[w10, 0, vgx4], {z16.s - z19.s}, z4.s[1] // 11000001-01010100-11000110-00010000 +// CHECK-INST: fmls za.s[w10, 0, vgx4], { z16.s - z19.s }, z4.s[1] +// CHECK-ENCODING: [0x10,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c610 + +fmls za.s[w10, 0], {z16.s - z19.s}, z4.s[1] // 11000001-01010100-11000110-00010000 +// CHECK-INST: fmls za.s[w10, 0, vgx4], { z16.s - z19.s }, z4.s[1] +// CHECK-ENCODING: [0x10,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c610 + +fmls za.s[w8, 0, vgx4], {z12.s - z15.s}, z2.s[2] // 11000001-01010010-10001001-10010000 +// CHECK-INST: fmls za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s[2] +// CHECK-ENCODING: [0x90,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1528990 + +fmls za.s[w8, 0], {z12.s - z15.s}, z2.s[2] // 11000001-01010010-10001001-10010000 +// CHECK-INST: fmls za.s[w8, 0, vgx4], { z12.s - z15.s }, z2.s[2] +// CHECK-ENCODING: [0x90,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1528990 + +fmls za.s[w10, 1, vgx4], {z0.s - z3.s}, z10.s[2] // 11000001-01011010-11001000-00010001 +// CHECK-INST: fmls za.s[w10, 1, vgx4], { z0.s - z3.s }, z10.s[2] +// CHECK-ENCODING: [0x11,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac811 + +fmls za.s[w10, 1], {z0.s - z3.s}, z10.s[2] // 11000001-01011010-11001000-00010001 +// CHECK-INST: fmls za.s[w10, 1, vgx4], { z0.s - z3.s }, z10.s[2] +// CHECK-ENCODING: [0x11,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac811 + +fmls za.s[w8, 5, vgx4], {z20.s - z23.s}, z14.s[2] // 11000001-01011110-10001010-10010101 +// CHECK-INST: fmls za.s[w8, 5, vgx4], { z20.s - z23.s }, z14.s[2] +// CHECK-ENCODING: [0x95,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8a95 + +fmls za.s[w8, 5], {z20.s - z23.s}, z14.s[2] // 11000001-01011110-10001010-10010101 +// CHECK-INST: fmls za.s[w8, 5, vgx4], { z20.s - z23.s }, z14.s[2] +// CHECK-ENCODING: [0x95,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8a95 + +fmls za.s[w11, 2, vgx4], {z8.s - z11.s}, z1.s[1] // 11000001-01010001-11100101-00010010 +// CHECK-INST: fmls za.s[w11, 2, vgx4], { z8.s - z11.s }, z1.s[1] +// CHECK-ENCODING: [0x12,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e512 + +fmls za.s[w11, 2], {z8.s - z11.s}, z1.s[1] // 11000001-01010001-11100101-00010010 +// CHECK-INST: fmls za.s[w11, 2, vgx4], { z8.s - z11.s }, z1.s[1] +// CHECK-ENCODING: [0x12,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e512 + +fmls za.s[w9, 7, vgx4], {z12.s - z15.s}, z11.s[2] // 11000001-01011011-10101001-10010111 +// CHECK-INST: fmls za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s[2] +// CHECK-ENCODING: [0x97,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba997 + +fmls za.s[w9, 7], {z12.s - z15.s}, z11.s[2] // 11000001-01011011-10101001-10010111 +// CHECK-INST: fmls za.s[w9, 7, vgx4], { z12.s - z15.s }, z11.s[2] +// CHECK-ENCODING: [0x97,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba997 + + fmls za.s[w8, 0, vgx4], {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100001-00011000-00001000 // CHECK-INST: fmls za.s[w8, 0, vgx4], { z0.s - z3.s }, { z0.s - z3.s } // CHECK-ENCODING: [0x08,0x18,0xa1,0xc1] diff --git a/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s b/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frinta-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frinta {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frinta {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frinta {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frinta {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frinta {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frinta {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frinta.s b/llvm/test/MC/AArch64/SME2/frinta.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frinta.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frinta {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101100-11100000-00000000 +// CHECK-INST: frinta { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace000 + +frinta {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101100-11100001-01010100 +// CHECK-INST: frinta { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace154 + +frinta {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101100-11100001-10010110 +// CHECK-INST: frinta { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace196 + +frinta {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101100-11100011-11011110 +// CHECK-INST: frinta { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xac,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ace3de + + +frinta {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111100-11100000-00000000 +// CHECK-INST: frinta { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce000 + +frinta {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111100-11100001-00010100 +// CHECK-INST: frinta { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce114 + +frinta {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111100-11100001-10010100 +// CHECK-INST: frinta { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce194 + +frinta {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-11100011-10011100 +// CHECK-INST: frinta { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bce39c + diff --git a/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintm-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintm {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintm {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintm {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintm {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintm {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintm {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintm.s b/llvm/test/MC/AArch64/SME2/frintm.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintm.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintm {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101010-11100000-00000000 +// CHECK-INST: frintm { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae000 + +frintm {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101010-11100001-01010100 +// CHECK-INST: frintm { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae154 + +frintm {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101010-11100001-10010110 +// CHECK-INST: frintm { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae196 + +frintm {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101010-11100011-11011110 +// CHECK-INST: frintm { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aae3de + + +frintm {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111010-11100000-00000000 +// CHECK-INST: frintm { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae000 + +frintm {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111010-11100001-00010100 +// CHECK-INST: frintm { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae114 + +frintm {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111010-11100001-10010100 +// CHECK-INST: frintm { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae194 + +frintm {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111010-11100011-10011100 +// CHECK-INST: frintm { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bae39c + diff --git a/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintn-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintn {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintn {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintn {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintn {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintn {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintn {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintn.s b/llvm/test/MC/AArch64/SME2/frintn.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintn.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintn {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101000-11100000-00000000 +// CHECK-INST: frintn { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e000 + +frintn {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101000-11100001-01010100 +// CHECK-INST: frintn { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e154 + +frintn {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101000-11100001-10010110 +// CHECK-INST: frintn { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e196 + +frintn {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101000-11100011-11011110 +// CHECK-INST: frintn { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8e3de + + +frintn {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111000-11100000-00000000 +// CHECK-INST: frintn { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e000 + +frintn {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111000-11100001-00010100 +// CHECK-INST: frintn { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e114 + +frintn {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111000-11100001-10010100 +// CHECK-INST: frintn { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e194 + +frintn {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111000-11100011-10011100 +// CHECK-INST: frintn { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b8e39c + diff --git a/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s b/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintp-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +frintp {z0.s-z1.s}, {z0.s-z2.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintp {z0.s-z1.s}, {z0.s-z2.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +frintp {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: frintp {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +frintp {z0.s-z1.s}, {z2.d-z3.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: frintp {z0.s-z1.s}, {z2.d-z3.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/frintp.s b/llvm/test/MC/AArch64/SME2/frintp.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/frintp.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +frintp {z0.s - z1.s}, {z0.s - z1.s} // 11000001-10101001-11100000-00000000 +// CHECK-INST: frintp { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e000 + +frintp {z20.s - z21.s}, {z10.s - z11.s} // 11000001-10101001-11100001-01010100 +// CHECK-INST: frintp { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e154 + +frintp {z22.s - z23.s}, {z12.s - z13.s} // 11000001-10101001-11100001-10010110 +// CHECK-INST: frintp { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e196 + +frintp {z30.s - z31.s}, {z30.s - z31.s} // 11000001-10101001-11100011-11011110 +// CHECK-INST: frintp { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9e3de + + +frintp {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10111001-11100000-00000000 +// CHECK-INST: frintp { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e000 + +frintp {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10111001-11100001-00010100 +// CHECK-INST: frintp { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e114 + +frintp {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10111001-11100001-10010100 +// CHECK-INST: frintp { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e194 + +frintp {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111001-11100011-10011100 +// CHECK-INST: frintp { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b9e39c + diff --git a/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fvdot-diagnostics.s @@ -0,0 +1,69 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector select register + +fvdot za.s[w7, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: fvdot za.s[w7, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fvdot za.s[w12, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: fvdot za.s[w12, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +fvdot za.s[w8, -1, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: fvdot za.s[w8, -1, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fvdot za.s[w8, 8, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: fvdot za.s[w8, 8, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector list + +fvdot za.s[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fvdot za.s[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fvdot za.s[w8, 0, vgx2], {z1.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: fvdot za.s[w8, 0, vgx2], {z1.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Matrix Operand + +fvdot za.h[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: fvdot za.h[w8, 0, vgx2], {z0.h-z2.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector grouping + +fvdot za.s[w8, 0, vgx4], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: fvdot za.s[w8, 0, vgx4], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid lane index + +fvdot za.s[w8, 0, vgx2], {z0.h-z1.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: fvdot za.s[w8, 0, vgx2], {z0.h-z1.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +fvdot za.s[w8, 0, vgx2], {z0.h-z1.h}, z0.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: fvdot za.s[w8, 0, vgx2], {z0.h-z1.h}, z0.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/fvdot.s b/llvm/test/MC/AArch64/SME2/fvdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/fvdot.s @@ -0,0 +1,158 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +fvdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00001000 +// CHECK-INST: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500008 + +fvdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00001000 +// CHECK-INST: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500008 + +fvdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01001101 +// CHECK-INST: fvdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x4d,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155454d + +fvdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01001101 +// CHECK-INST: fvdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x4d,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155454d + +fvdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10001111 +// CHECK-INST: fvdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x8f,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d8f + +fvdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10001111 +// CHECK-INST: fvdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x8f,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586d8f + +fvdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11001111 +// CHECK-INST: fvdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xcf,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fcf + +fvdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11001111 +// CHECK-INST: fvdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xcf,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fcf + +fvdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00001101 +// CHECK-INST: fvdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x0d,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e0d + +fvdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00001101 +// CHECK-INST: fvdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x0d,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e0d + +fvdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00001001 +// CHECK-INST: fvdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0409 + +fvdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00001001 +// CHECK-INST: fvdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0409 + +fvdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01001000 +// CHECK-INST: fvdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x48,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544648 + +fvdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01001000 +// CHECK-INST: fvdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x48,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544648 + +fvdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10001000 +// CHECK-INST: fvdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x88,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520988 + +fvdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10001000 +// CHECK-INST: fvdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x88,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1520988 + +fvdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00001001 +// CHECK-INST: fvdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x09,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4809 + +fvdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00001001 +// CHECK-INST: fvdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x09,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4809 + +fvdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11001101 +// CHECK-INST: fvdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0acd + +fvdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11001101 +// CHECK-INST: fvdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0acd + +fvdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00001010 +// CHECK-INST: fvdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151650a + +fvdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00001010 +// CHECK-INST: fvdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151650a + +fvdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10001111 +// CHECK-INST: fvdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x8f,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b298f + +fvdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10001111 +// CHECK-INST: fvdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x8f,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b298f + diff --git a/llvm/test/MC/AArch64/SME2/ld1b.s b/llvm/test/MC/AArch64/SME2/ld1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ld1b.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1b {z0.b, z8.b}, pn8/z, [x0, x0] // 10100001-00000000-00000000-00000000 +// CHECK-INST: ld1b { z0.b, z8.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x00,0x00,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1000000 + +ld1b {z21.b, z29.b}, pn13/z, [x10, x21] // 10100001-00010101-00010101-01010101 +// CHECK-INST: ld1b { z21.b, z29.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x55,0x15,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1151555 + +ld1b {z23.b, z31.b}, pn11/z, [x13, x8] // 10100001-00001000-00001101-10110111 +// CHECK-INST: ld1b { z23.b, z31.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xb7,0x0d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1080db7 + +ld1b {z23.b, z31.b}, pn15/z, [sp, xzr] // 10100001-00011111-00011111-11110111 +// CHECK-INST: ld1b { z23.b, z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xf7,0x1f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f1ff7 + + +ld1b {z0.b, z8.b}, pn8/z, [x0] // 10100001-01000000-00000000-00000000 +// CHECK-INST: ld1b { z0.b, z8.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x00,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1400000 + +ld1b {z21.b, z29.b}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-00010101-01010101 +// CHECK-INST: ld1b { z21.b, z29.b }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x15,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1451555 + +ld1b {z23.b, z31.b}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-00001101-10110111 +// CHECK-INST: ld1b { z23.b, z31.b }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x0d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1480db7 + +ld1b {z23.b, z31.b}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-00011111-11110111 +// CHECK-INST: ld1b { z23.b, z31.b }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x1f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f1ff7 + + +ld1b {z0.b, z4.b, z8.b, z12.b}, pn8/z, [x0, x0] // 10100001-00000000-10000000-00000000 +// CHECK-INST: ld1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x00,0x80,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1008000 + +ld1b {z17.b, z21.b, z25.b, z29.b}, pn13/z, [x10, x21] // 10100001-00010101-10010101-01010001 +// CHECK-INST: ld1b { z17.b, z21.b, z25.b, z29.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x51,0x95,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1159551 + +ld1b {z19.b, z23.b, z27.b, z31.b}, pn11/z, [x13, x8] // 10100001-00001000-10001101-10110011 +// CHECK-INST: ld1b { z19.b, z23.b, z27.b, z31.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xb3,0x8d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1088db3 + +ld1b {z19.b, z23.b, z27.b, z31.b}, pn15/z, [sp, xzr] // 10100001-00011111-10011111-11110011 +// CHECK-INST: ld1b { z19.b, z23.b, z27.b, z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xf3,0x9f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f9ff3 + + +ld1b {z0.b, z4.b, z8.b, z12.b}, pn8/z, [x0] // 10100001-01000000-10000000-00000000 +// CHECK-INST: ld1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x80,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1408000 + +ld1b {z17.b, z21.b, z25.b, z29.b}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-10010101-01010001 +// CHECK-INST: ld1b { z17.b, z21.b, z25.b, z29.b }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0x95,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1459551 + +ld1b {z19.b, z23.b, z27.b, z31.b}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-10001101-10110011 +// CHECK-INST: ld1b { z19.b, z23.b, z27.b, z31.b }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0x8d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1488db3 + +ld1b {z19.b, z23.b, z27.b, z31.b}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-10011111-11110011 +// CHECK-INST: ld1b { z19.b, z23.b, z27.b, z31.b }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0x9f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f9ff3 + diff --git a/llvm/test/MC/AArch64/SME2/ld1d.s b/llvm/test/MC/AArch64/SME2/ld1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ld1d.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1d {z0.d, z8.d}, pn8/z, [x0, x0, lsl #3] // 10100001-00000000-01100000-00000000 +// CHECK-INST: ld1d { z0.d, z8.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x60,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1006000 + +ld1d {z21.d, z29.d}, pn13/z, [x10, x21, lsl #3] // 10100001-00010101-01110101-01010101 +// CHECK-INST: ld1d { z21.d, z29.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x75,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1157555 + +ld1d {z23.d, z31.d}, pn11/z, [x13, x8, lsl #3] // 10100001-00001000-01101101-10110111 +// CHECK-INST: ld1d { z23.d, z31.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x6d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1086db7 + +ld1d {z23.d, z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100001-00011111-01111111-11110111 +// CHECK-INST: ld1d { z23.d, z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xf7,0x7f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f7ff7 + + +ld1d {z0.d, z8.d}, pn8/z, [x0] // 10100001-01000000-01100000-00000000 +// CHECK-INST: ld1d { z0.d, z8.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x60,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1406000 + +ld1d {z21.d, z29.d}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-01110101-01010101 +// CHECK-INST: ld1d { z21.d, z29.d }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x75,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1457555 + +ld1d {z23.d, z31.d}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-01101101-10110111 +// CHECK-INST: ld1d { z23.d, z31.d }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x6d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1486db7 + +ld1d {z23.d, z31.d}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-01111111-11110111 +// CHECK-INST: ld1d { z23.d, z31.d }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x7f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f7ff7 + + +ld1d {z0.d, z4.d, z8.d, z12.d}, pn8/z, [x0, x0, lsl #3] // 10100001-00000000-11100000-00000000 +// CHECK-INST: ld1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0xe0,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a100e000 + +ld1d {z17.d, z21.d, z25.d, z29.d}, pn13/z, [x10, x21, lsl #3] // 10100001-00010101-11110101-01010001 +// CHECK-INST: ld1d { z17.d, z21.d, z25.d, z29.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x51,0xf5,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115f551 + +ld1d {z19.d, z23.d, z27.d, z31.d}, pn11/z, [x13, x8, lsl #3] // 10100001-00001000-11101101-10110011 +// CHECK-INST: ld1d { z19.d, z23.d, z27.d, z31.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb3,0xed,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a108edb3 + +ld1d {z19.d, z23.d, z27.d, z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100001-00011111-11111111-11110011 +// CHECK-INST: ld1d { z19.d, z23.d, z27.d, z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xf3,0xff,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11ffff3 + + +ld1d {z0.d, z4.d, z8.d, z12.d}, pn8/z, [x0] // 10100001-01000000-11100000-00000000 +// CHECK-INST: ld1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a140e000 + +ld1d {z17.d, z21.d, z25.d, z29.d}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-11110101-01010001 +// CHECK-INST: ld1d { z17.d, z21.d, z25.d, z29.d }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0xf5,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145f551 + +ld1d {z19.d, z23.d, z27.d, z31.d}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-11101101-10110011 +// CHECK-INST: ld1d { z19.d, z23.d, z27.d, z31.d }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0xed,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a148edb3 + +ld1d {z19.d, z23.d, z27.d, z31.d}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-11111111-11110011 +// CHECK-INST: ld1d { z19.d, z23.d, z27.d, z31.d }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0xff,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14ffff3 + diff --git a/llvm/test/MC/AArch64/SME2/ld1h.s b/llvm/test/MC/AArch64/SME2/ld1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ld1h.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1h {z0.h, z8.h}, pn8/z, [x0, x0, lsl #1] // 10100001-00000000-00100000-00000000 +// CHECK-INST: ld1h { z0.h, z8.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0x20,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1002000 + +ld1h {z21.h, z29.h}, pn13/z, [x10, x21, lsl #1] // 10100001-00010101-00110101-01010101 +// CHECK-INST: ld1h { z21.h, z29.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x55,0x35,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1153555 + +ld1h {z23.h, z31.h}, pn11/z, [x13, x8, lsl #1] // 10100001-00001000-00101101-10110111 +// CHECK-INST: ld1h { z23.h, z31.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb7,0x2d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1082db7 + +ld1h {z23.h, z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100001-00011111-00111111-11110111 +// CHECK-INST: ld1h { z23.h, z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xf7,0x3f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f3ff7 + + +ld1h {z0.h, z8.h}, pn8/z, [x0] // 10100001-01000000-00100000-00000000 +// CHECK-INST: ld1h { z0.h, z8.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1402000 + +ld1h {z21.h, z29.h}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-00110101-01010101 +// CHECK-INST: ld1h { z21.h, z29.h }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1453555 + +ld1h {z23.h, z31.h}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-00101101-10110111 +// CHECK-INST: ld1h { z23.h, z31.h }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1482db7 + +ld1h {z23.h, z31.h}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-00111111-11110111 +// CHECK-INST: ld1h { z23.h, z31.h }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x3f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f3ff7 + + +ld1h {z0.h, z4.h, z8.h, z12.h}, pn8/z, [x0, x0, lsl #1] // 10100001-00000000-10100000-00000000 +// CHECK-INST: ld1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0xa0,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a100a000 + +ld1h {z17.h, z21.h, z25.h, z29.h}, pn13/z, [x10, x21, lsl #1] // 10100001-00010101-10110101-01010001 +// CHECK-INST: ld1h { z17.h, z21.h, z25.h, z29.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x51,0xb5,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115b551 + +ld1h {z19.h, z23.h, z27.h, z31.h}, pn11/z, [x13, x8, lsl #1] // 10100001-00001000-10101101-10110011 +// CHECK-INST: ld1h { z19.h, z23.h, z27.h, z31.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb3,0xad,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a108adb3 + +ld1h {z19.h, z23.h, z27.h, z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100001-00011111-10111111-11110011 +// CHECK-INST: ld1h { z19.h, z23.h, z27.h, z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xf3,0xbf,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11fbff3 + + +ld1h {z0.h, z4.h, z8.h, z12.h}, pn8/z, [x0] // 10100001-01000000-10100000-00000000 +// CHECK-INST: ld1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0xa0,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a140a000 + +ld1h {z17.h, z21.h, z25.h, z29.h}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-10110101-01010001 +// CHECK-INST: ld1h { z17.h, z21.h, z25.h, z29.h }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0xb5,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145b551 + +ld1h {z19.h, z23.h, z27.h, z31.h}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-10101101-10110011 +// CHECK-INST: ld1h { z19.h, z23.h, z27.h, z31.h }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0xad,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a148adb3 + +ld1h {z19.h, z23.h, z27.h, z31.h}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-10111111-11110011 +// CHECK-INST: ld1h { z19.h, z23.h, z27.h, z31.h }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0xbf,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14fbff3 + diff --git a/llvm/test/MC/AArch64/SME2/ld1w.s b/llvm/test/MC/AArch64/SME2/ld1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ld1w.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ld1w {z0.s, z8.s}, pn8/z, [x0, x0, lsl #2] // 10100001-00000000-01000000-00000000 +// CHECK-INST: ld1w { z0.s, z8.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x40,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1004000 + +ld1w {z21.s, z29.s}, pn13/z, [x10, x21, lsl #2] // 10100001-00010101-01010101-01010101 +// CHECK-INST: ld1w { z21.s, z29.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x55,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1155555 + +ld1w {z23.s, z31.s}, pn11/z, [x13, x8, lsl #2] // 10100001-00001000-01001101-10110111 +// CHECK-INST: ld1w { z23.s, z31.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1084db7 + +ld1w {z23.s, z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100001-00011111-01011111-11110111 +// CHECK-INST: ld1w { z23.s, z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xf7,0x5f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f5ff7 + + +ld1w {z0.s, z8.s}, pn8/z, [x0] // 10100001-01000000-01000000-00000000 +// CHECK-INST: ld1w { z0.s, z8.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x40,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1404000 + +ld1w {z21.s, z29.s}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-01010101-01010101 +// CHECK-INST: ld1w { z21.s, z29.s }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x55,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1455555 + +ld1w {z23.s, z31.s}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-01001101-10110111 +// CHECK-INST: ld1w { z23.s, z31.s }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x4d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1484db7 + +ld1w {z23.s, z31.s}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-01011111-11110111 +// CHECK-INST: ld1w { z23.s, z31.s }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x5f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f5ff7 + + +ld1w {z0.s, z4.s, z8.s, z12.s}, pn8/z, [x0, x0, lsl #2] // 10100001-00000000-11000000-00000000 +// CHECK-INST: ld1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0xc0,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a100c000 + +ld1w {z17.s, z21.s, z25.s, z29.s}, pn13/z, [x10, x21, lsl #2] // 10100001-00010101-11010101-01010001 +// CHECK-INST: ld1w { z17.s, z21.s, z25.s, z29.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x51,0xd5,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115d551 + +ld1w {z19.s, z23.s, z27.s, z31.s}, pn11/z, [x13, x8, lsl #2] // 10100001-00001000-11001101-10110011 +// CHECK-INST: ld1w { z19.s, z23.s, z27.s, z31.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb3,0xcd,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a108cdb3 + +ld1w {z19.s, z23.s, z27.s, z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100001-00011111-11011111-11110011 +// CHECK-INST: ld1w { z19.s, z23.s, z27.s, z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xf3,0xdf,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11fdff3 + + +ld1w {z0.s, z4.s, z8.s, z12.s}, pn8/z, [x0] // 10100001-01000000-11000000-00000000 +// CHECK-INST: ld1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0xc0,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a140c000 + +ld1w {z17.s, z21.s, z25.s, z29.s}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-11010101-01010001 +// CHECK-INST: ld1w { z17.s, z21.s, z25.s, z29.s }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0xd5,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145d551 + +ld1w {z19.s, z23.s, z27.s, z31.s}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-11001101-10110011 +// CHECK-INST: ld1w { z19.s, z23.s, z27.s, z31.s }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0xcd,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a148cdb3 + +ld1w {z19.s, z23.s, z27.s, z31.s}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-11011111-11110011 +// CHECK-INST: ld1w { z19.s, z23.s, z27.s, z31.s }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0xdf,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14fdff3 + diff --git a/llvm/test/MC/AArch64/SME2/ldnt1b.s b/llvm/test/MC/AArch64/SME2/ldnt1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldnt1b.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldnt1b {z0.b, z8.b}, pn8/z, [x0, x0] // 10100001-00000000-00000000-00001000 +// CHECK-INST: ldnt1b { z0.b, z8.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x08,0x00,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1000008 + +ldnt1b {z21.b, z29.b}, pn13/z, [x10, x21] // 10100001-00010101-00010101-01011101 +// CHECK-INST: ldnt1b { z21.b, z29.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x5d,0x15,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115155d + +ldnt1b {z23.b, z31.b}, pn11/z, [x13, x8] // 10100001-00001000-00001101-10111111 +// CHECK-INST: ldnt1b { z23.b, z31.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xbf,0x0d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1080dbf + +ldnt1b {z23.b, z31.b}, pn15/z, [sp, xzr] // 10100001-00011111-00011111-11111111 +// CHECK-INST: ldnt1b { z23.b, z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xff,0x1f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f1fff + + +ldnt1b {z0.b, z8.b}, pn8/z, [x0] // 10100001-01000000-00000000-00001000 +// CHECK-INST: ldnt1b { z0.b, z8.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0x00,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1400008 + +ldnt1b {z21.b, z29.b}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-00010101-01011101 +// CHECK-INST: ldnt1b { z21.b, z29.b }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x15,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145155d + +ldnt1b {z23.b, z31.b}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-00001101-10111111 +// CHECK-INST: ldnt1b { z23.b, z31.b }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x0d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1480dbf + +ldnt1b {z23.b, z31.b}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-00011111-11111111 +// CHECK-INST: ldnt1b { z23.b, z31.b }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x1f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f1fff + + +ldnt1b {z0.b, z4.b, z8.b, z12.b}, pn8/z, [x0, x0] // 10100001-00000000-10000000-00001000 +// CHECK-INST: ldnt1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x08,0x80,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1008008 + +ldnt1b {z17.b, z21.b, z25.b, z29.b}, pn13/z, [x10, x21] // 10100001-00010101-10010101-01011001 +// CHECK-INST: ldnt1b { z17.b, z21.b, z25.b, z29.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x59,0x95,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1159559 + +ldnt1b {z19.b, z23.b, z27.b, z31.b}, pn11/z, [x13, x8] // 10100001-00001000-10001101-10111011 +// CHECK-INST: ldnt1b { z19.b, z23.b, z27.b, z31.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xbb,0x8d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1088dbb + +ldnt1b {z19.b, z23.b, z27.b, z31.b}, pn15/z, [sp, xzr] // 10100001-00011111-10011111-11111011 +// CHECK-INST: ldnt1b { z19.b, z23.b, z27.b, z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xfb,0x9f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f9ffb + + +ldnt1b {z0.b, z4.b, z8.b, z12.b}, pn8/z, [x0] // 10100001-01000000-10000000-00001000 +// CHECK-INST: ldnt1b { z0.b, z4.b, z8.b, z12.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0x80,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1408008 + +ldnt1b {z17.b, z21.b, z25.b, z29.b}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-10010101-01011001 +// CHECK-INST: ldnt1b { z17.b, z21.b, z25.b, z29.b }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0x95,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1459559 + +ldnt1b {z19.b, z23.b, z27.b, z31.b}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-10001101-10111011 +// CHECK-INST: ldnt1b { z19.b, z23.b, z27.b, z31.b }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0x8d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1488dbb + +ldnt1b {z19.b, z23.b, z27.b, z31.b}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-10011111-11111011 +// CHECK-INST: ldnt1b { z19.b, z23.b, z27.b, z31.b }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0x9f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f9ffb + diff --git a/llvm/test/MC/AArch64/SME2/ldnt1d.s b/llvm/test/MC/AArch64/SME2/ldnt1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldnt1d.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldnt1d {z0.d, z8.d}, pn8/z, [x0, x0, lsl #3] // 10100001-00000000-01100000-00001000 +// CHECK-INST: ldnt1d { z0.d, z8.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x08,0x60,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1006008 + +ldnt1d {z21.d, z29.d}, pn13/z, [x10, x21, lsl #3] // 10100001-00010101-01110101-01011101 +// CHECK-INST: ldnt1d { z21.d, z29.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x5d,0x75,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115755d + +ldnt1d {z23.d, z31.d}, pn11/z, [x13, x8, lsl #3] // 10100001-00001000-01101101-10111111 +// CHECK-INST: ldnt1d { z23.d, z31.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xbf,0x6d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1086dbf + +ldnt1d {z23.d, z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100001-00011111-01111111-11111111 +// CHECK-INST: ldnt1d { z23.d, z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xff,0x7f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f7fff + + +ldnt1d {z0.d, z8.d}, pn8/z, [x0] // 10100001-01000000-01100000-00001000 +// CHECK-INST: ldnt1d { z0.d, z8.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0x60,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1406008 + +ldnt1d {z21.d, z29.d}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-01110101-01011101 +// CHECK-INST: ldnt1d { z21.d, z29.d }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x75,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145755d + +ldnt1d {z23.d, z31.d}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-01101101-10111111 +// CHECK-INST: ldnt1d { z23.d, z31.d }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x6d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1486dbf + +ldnt1d {z23.d, z31.d}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-01111111-11111111 +// CHECK-INST: ldnt1d { z23.d, z31.d }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x7f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f7fff + + +ldnt1d {z0.d, z4.d, z8.d, z12.d}, pn8/z, [x0, x0, lsl #3] // 10100001-00000000-11100000-00001000 +// CHECK-INST: ldnt1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x08,0xe0,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a100e008 + +ldnt1d {z17.d, z21.d, z25.d, z29.d}, pn13/z, [x10, x21, lsl #3] // 10100001-00010101-11110101-01011001 +// CHECK-INST: ldnt1d { z17.d, z21.d, z25.d, z29.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x59,0xf5,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115f559 + +ldnt1d {z19.d, z23.d, z27.d, z31.d}, pn11/z, [x13, x8, lsl #3] // 10100001-00001000-11101101-10111011 +// CHECK-INST: ldnt1d { z19.d, z23.d, z27.d, z31.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xbb,0xed,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a108edbb + +ldnt1d {z19.d, z23.d, z27.d, z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100001-00011111-11111111-11111011 +// CHECK-INST: ldnt1d { z19.d, z23.d, z27.d, z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfb,0xff,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11ffffb + + +ldnt1d {z0.d, z4.d, z8.d, z12.d}, pn8/z, [x0] // 10100001-01000000-11100000-00001000 +// CHECK-INST: ldnt1d { z0.d, z4.d, z8.d, z12.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0xe0,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a140e008 + +ldnt1d {z17.d, z21.d, z25.d, z29.d}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-11110101-01011001 +// CHECK-INST: ldnt1d { z17.d, z21.d, z25.d, z29.d }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0xf5,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145f559 + +ldnt1d {z19.d, z23.d, z27.d, z31.d}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-11101101-10111011 +// CHECK-INST: ldnt1d { z19.d, z23.d, z27.d, z31.d }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0xed,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a148edbb + +ldnt1d {z19.d, z23.d, z27.d, z31.d}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-11111111-11111011 +// CHECK-INST: ldnt1d { z19.d, z23.d, z27.d, z31.d }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0xff,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14ffffb + diff --git a/llvm/test/MC/AArch64/SME2/ldnt1h.s b/llvm/test/MC/AArch64/SME2/ldnt1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldnt1h.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldnt1h {z0.h, z8.h}, pn8/z, [x0, x0, lsl #1] // 10100001-00000000-00100000-00001000 +// CHECK-INST: ldnt1h { z0.h, z8.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x08,0x20,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1002008 + +ldnt1h {z21.h, z29.h}, pn13/z, [x10, x21, lsl #1] // 10100001-00010101-00110101-01011101 +// CHECK-INST: ldnt1h { z21.h, z29.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x5d,0x35,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115355d + +ldnt1h {z23.h, z31.h}, pn11/z, [x13, x8, lsl #1] // 10100001-00001000-00101101-10111111 +// CHECK-INST: ldnt1h { z23.h, z31.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xbf,0x2d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1082dbf + +ldnt1h {z23.h, z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100001-00011111-00111111-11111111 +// CHECK-INST: ldnt1h { z23.h, z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xff,0x3f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f3fff + + +ldnt1h {z0.h, z8.h}, pn8/z, [x0] // 10100001-01000000-00100000-00001000 +// CHECK-INST: ldnt1h { z0.h, z8.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0x20,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1402008 + +ldnt1h {z21.h, z29.h}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-00110101-01011101 +// CHECK-INST: ldnt1h { z21.h, z29.h }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x35,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145355d + +ldnt1h {z23.h, z31.h}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-00101101-10111111 +// CHECK-INST: ldnt1h { z23.h, z31.h }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x2d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1482dbf + +ldnt1h {z23.h, z31.h}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-00111111-11111111 +// CHECK-INST: ldnt1h { z23.h, z31.h }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f3fff + + +ldnt1h {z0.h, z4.h, z8.h, z12.h}, pn8/z, [x0, x0, lsl #1] // 10100001-00000000-10100000-00001000 +// CHECK-INST: ldnt1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x08,0xa0,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a100a008 + +ldnt1h {z17.h, z21.h, z25.h, z29.h}, pn13/z, [x10, x21, lsl #1] // 10100001-00010101-10110101-01011001 +// CHECK-INST: ldnt1h { z17.h, z21.h, z25.h, z29.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x59,0xb5,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115b559 + +ldnt1h {z19.h, z23.h, z27.h, z31.h}, pn11/z, [x13, x8, lsl #1] // 10100001-00001000-10101101-10111011 +// CHECK-INST: ldnt1h { z19.h, z23.h, z27.h, z31.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xbb,0xad,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a108adbb + +ldnt1h {z19.h, z23.h, z27.h, z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100001-00011111-10111111-11111011 +// CHECK-INST: ldnt1h { z19.h, z23.h, z27.h, z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfb,0xbf,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11fbffb + + +ldnt1h {z0.h, z4.h, z8.h, z12.h}, pn8/z, [x0] // 10100001-01000000-10100000-00001000 +// CHECK-INST: ldnt1h { z0.h, z4.h, z8.h, z12.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0xa0,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a140a008 + +ldnt1h {z17.h, z21.h, z25.h, z29.h}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-10110101-01011001 +// CHECK-INST: ldnt1h { z17.h, z21.h, z25.h, z29.h }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0xb5,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145b559 + +ldnt1h {z19.h, z23.h, z27.h, z31.h}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-10101101-10111011 +// CHECK-INST: ldnt1h { z19.h, z23.h, z27.h, z31.h }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0xad,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a148adbb + +ldnt1h {z19.h, z23.h, z27.h, z31.h}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-10111111-11111011 +// CHECK-INST: ldnt1h { z19.h, z23.h, z27.h, z31.h }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0xbf,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14fbffb + diff --git a/llvm/test/MC/AArch64/SME2/ldnt1w.s b/llvm/test/MC/AArch64/SME2/ldnt1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldnt1w.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldnt1w {z0.s, z8.s}, pn8/z, [x0, x0, lsl #2] // 10100001-00000000-01000000-00001000 +// CHECK-INST: ldnt1w { z0.s, z8.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x08,0x40,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1004008 + +ldnt1w {z21.s, z29.s}, pn13/z, [x10, x21, lsl #2] // 10100001-00010101-01010101-01011101 +// CHECK-INST: ldnt1w { z21.s, z29.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x5d,0x55,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115555d + +ldnt1w {z23.s, z31.s}, pn11/z, [x13, x8, lsl #2] // 10100001-00001000-01001101-10111111 +// CHECK-INST: ldnt1w { z23.s, z31.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xbf,0x4d,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1084dbf + +ldnt1w {z23.s, z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100001-00011111-01011111-11111111 +// CHECK-INST: ldnt1w { z23.s, z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xff,0x5f,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11f5fff + + +ldnt1w {z0.s, z8.s}, pn8/z, [x0] // 10100001-01000000-01000000-00001000 +// CHECK-INST: ldnt1w { z0.s, z8.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0x40,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1404008 + +ldnt1w {z21.s, z29.s}, pn13/z, [x10, #10, mul vl] // 10100001-01000101-01010101-01011101 +// CHECK-INST: ldnt1w { z21.s, z29.s }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x55,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145555d + +ldnt1w {z23.s, z31.s}, pn11/z, [x13, #-16, mul vl] // 10100001-01001000-01001101-10111111 +// CHECK-INST: ldnt1w { z23.s, z31.s }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x4d,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1484dbf + +ldnt1w {z23.s, z31.s}, pn15/z, [sp, #-2, mul vl] // 10100001-01001111-01011111-11111111 +// CHECK-INST: ldnt1w { z23.s, z31.s }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x5f,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14f5fff + + +ldnt1w {z0.s, z4.s, z8.s, z12.s}, pn8/z, [x0, x0, lsl #2] // 10100001-00000000-11000000-00001000 +// CHECK-INST: ldnt1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x08,0xc0,0x00,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a100c008 + +ldnt1w {z17.s, z21.s, z25.s, z29.s}, pn13/z, [x10, x21, lsl #2] // 10100001-00010101-11010101-01011001 +// CHECK-INST: ldnt1w { z17.s, z21.s, z25.s, z29.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x59,0xd5,0x15,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a115d559 + +ldnt1w {z19.s, z23.s, z27.s, z31.s}, pn11/z, [x13, x8, lsl #2] // 10100001-00001000-11001101-10111011 +// CHECK-INST: ldnt1w { z19.s, z23.s, z27.s, z31.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xbb,0xcd,0x08,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a108cdbb + +ldnt1w {z19.s, z23.s, z27.s, z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100001-00011111-11011111-11111011 +// CHECK-INST: ldnt1w { z19.s, z23.s, z27.s, z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfb,0xdf,0x1f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a11fdffb + + +ldnt1w {z0.s, z4.s, z8.s, z12.s}, pn8/z, [x0] // 10100001-01000000-11000000-00001000 +// CHECK-INST: ldnt1w { z0.s, z4.s, z8.s, z12.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x08,0xc0,0x40,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a140c008 + +ldnt1w {z17.s, z21.s, z25.s, z29.s}, pn13/z, [x10, #20, mul vl] // 10100001-01000101-11010101-01011001 +// CHECK-INST: ldnt1w { z17.s, z21.s, z25.s, z29.s }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0xd5,0x45,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a145d559 + +ldnt1w {z19.s, z23.s, z27.s, z31.s}, pn11/z, [x13, #-32, mul vl] // 10100001-01001000-11001101-10111011 +// CHECK-INST: ldnt1w { z19.s, z23.s, z27.s, z31.s }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0xcd,0x48,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a148cdbb + +ldnt1w {z19.s, z23.s, z27.s, z31.s}, pn15/z, [sp, #-4, mul vl] // 10100001-01001111-11011111-11111011 +// CHECK-INST: ldnt1w { z19.s, z23.s, z27.s, z31.s }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0xdf,0x4f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a14fdffb + diff --git a/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s b/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldr-diagnostics.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +ldr zt1, [x0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be zt0 register +// CHECK-NEXT: ldr zt1, [x0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/ldr.s b/llvm/test/MC/AArch64/SME2/ldr.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ldr.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ldr zt0, [x0] // 11100001-00011111-10000000-00000000 +// CHECK-INST: ldr zt0, [x0] +// CHECK-ENCODING: [0x00,0x80,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f8000 + +ldr zt0, [x10] // 11100001-00011111-10000001-01000000 +// CHECK-INST: ldr zt0, [x10] +// CHECK-ENCODING: [0x40,0x81,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f8140 + +ldr zt0, [x13] // 11100001-00011111-10000001-10100000 +// CHECK-INST: ldr zt0, [x13] +// CHECK-ENCODING: [0xa0,0x81,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f81a0 + +ldr zt0, [sp] // 11100001-00011111-10000011-11100000 +// CHECK-INST: ldr zt0, [sp] +// CHECK-ENCODING: [0xe0,0x83,0x1f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e11f83e0 + diff --git a/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s b/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti2-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid lane indices + +luti2 z0.h, zt0, z0[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: luti2 z0.h, zt0, z0[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 z0.s, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: luti2 z0.s, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.b-z1.b}, zt0, z0[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti2 {z0.b-z1.b}, zt0, z0[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.h-z1.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti2 {z0.h-z1.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.s-z3.s}, zt0, z0[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti2 {z0.s-z3.s}, zt0, z0[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z0.b-z3.b}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti2 {z0.b-z3.b}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lists + +luti2 {z0.h-z2.h}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti2 {z0.h-z2.h}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z1.s-z2.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: luti2 {z1.s-z2.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti2 {z1.s-z4.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: luti2 {z1.s-z4.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +luti2 {z0.d-z1.d}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti2 {z0.d-z1.d}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/luti2.s b/llvm/test/MC/AArch64/SME2/luti2.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti2.s @@ -0,0 +1,238 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +luti2 z0.h, zt0, z0[0] // 11000000-11001100-00010000-00000000 +// CHECK-INST: luti2 z0.h, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x10,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc1000 + +luti2 z21.h, zt0, z10[5] // 11000000-11001101-01010001-01010101 +// CHECK-INST: luti2 z21.h, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x51,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd5155 + +luti2 z23.h, zt0, z13[3] // 11000000-11001100-11010001-10110111 +// CHECK-INST: luti2 z23.h, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xd1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ccd1b7 + +luti2 z31.h, zt0, z31[15] // 11000000-11001111-11010011-11111111 +// CHECK-INST: luti2 z31.h, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xd3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfd3ff + + +luti2 z0.s, zt0, z0[0] // 11000000-11001100-00100000-00000000 +// CHECK-INST: luti2 z0.s, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x20,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc2000 + +luti2 z21.s, zt0, z10[5] // 11000000-11001101-01100001-01010101 +// CHECK-INST: luti2 z21.s, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x61,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd6155 + +luti2 z23.s, zt0, z13[3] // 11000000-11001100-11100001-10110111 +// CHECK-INST: luti2 z23.s, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xe1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cce1b7 + +luti2 z31.s, zt0, z31[15] // 11000000-11001111-11100011-11111111 +// CHECK-INST: luti2 z31.s, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xe3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfe3ff + + +luti2 z0.b, zt0, z0[0] // 11000000-11001100-00000000-00000000 +// CHECK-INST: luti2 z0.b, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x00,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cc0000 + +luti2 z21.b, zt0, z10[5] // 11000000-11001101-01000001-01010101 +// CHECK-INST: luti2 z21.b, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x41,0xcd,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cd4155 + +luti2 z23.b, zt0, z13[3] // 11000000-11001100-11000001-10110111 +// CHECK-INST: luti2 z23.b, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xc1,0xcc,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ccc1b7 + +luti2 z31.b, zt0, z31[15] // 11000000-11001111-11000011-11111111 +// CHECK-INST: luti2 z31.b, zt0, z31[15] +// CHECK-ENCODING: [0xff,0xc3,0xcf,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cfc3ff + + +luti2 {z0.h - z1.h}, zt0, z0[0] // 11000000-10001100-01010000-00000000 +// CHECK-INST: luti2 { z0.h, z1.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x50,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c5000 + +luti2 {z20.h - z21.h}, zt0, z10[2] // 11000000-10001101-01010001-01010100 +// CHECK-INST: luti2 { z20.h, z21.h }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x51,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d5154 + +luti2 {z22.h - z23.h}, zt0, z13[1] // 11000000-10001100-11010001-10110110 +// CHECK-INST: luti2 { z22.h, z23.h }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xd1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08cd1b6 + +luti2 {z30.h - z31.h}, zt0, z31[7] // 11000000-10001111-11010011-11111110 +// CHECK-INST: luti2 { z30.h, z31.h }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xd3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fd3fe + + +luti2 {z0.s - z1.s}, zt0, z0[0] // 11000000-10001100-01100000-00000000 +// CHECK-INST: luti2 { z0.s, z1.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x60,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c6000 + +luti2 {z20.s - z21.s}, zt0, z10[2] // 11000000-10001101-01100001-01010100 +// CHECK-INST: luti2 { z20.s, z21.s }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x61,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d6154 + +luti2 {z22.s - z23.s}, zt0, z13[1] // 11000000-10001100-11100001-10110110 +// CHECK-INST: luti2 { z22.s, z23.s }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xe1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ce1b6 + +luti2 {z30.s - z31.s}, zt0, z31[7] // 11000000-10001111-11100011-11111110 +// CHECK-INST: luti2 { z30.s, z31.s }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xe3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fe3fe + + +luti2 {z0.b - z1.b}, zt0, z0[0] // 11000000-10001100-01000000-00000000 +// CHECK-INST: luti2 { z0.b, z1.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x40,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c4000 + +luti2 {z20.b - z21.b}, zt0, z10[2] // 11000000-10001101-01000001-01010100 +// CHECK-INST: luti2 { z20.b, z21.b }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x41,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d4154 + +luti2 {z22.b - z23.b}, zt0, z13[1] // 11000000-10001100-11000001-10110110 +// CHECK-INST: luti2 { z22.b, z23.b }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xc1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08cc1b6 + +luti2 {z30.b - z31.b}, zt0, z31[7] // 11000000-10001111-11000011-11111110 +// CHECK-INST: luti2 { z30.b, z31.b }, zt0, z31[7] +// CHECK-ENCODING: [0xfe,0xc3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fc3fe + + +luti2 {z0.h - z3.h}, zt0, z0[0] // 11000000-10001100-10010000-00000000 +// CHECK-INST: luti2 { z0.h - z3.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x90,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c9000 + +luti2 {z20.h - z23.h}, zt0, z10[1] // 11000000-10001101-10010001-01010100 +// CHECK-INST: luti2 { z20.h - z23.h }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x91,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d9154 + +luti2 {z20.h - z23.h}, zt0, z13[0] // 11000000-10001100-10010001-10110100 +// CHECK-INST: luti2 { z20.h - z23.h }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x91,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c91b4 + +luti2 {z28.h - z31.h}, zt0, z31[3] // 11000000-10001111-10010011-11111100 +// CHECK-INST: luti2 { z28.h - z31.h }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0x93,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08f93fc + + +luti2 {z0.s - z3.s}, zt0, z0[0] // 11000000-10001100-10100000-00000000 +// CHECK-INST: luti2 { z0.s - z3.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0xa0,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ca000 + +luti2 {z20.s - z23.s}, zt0, z10[1] // 11000000-10001101-10100001-01010100 +// CHECK-INST: luti2 { z20.s - z23.s }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0xa1,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08da154 + +luti2 {z20.s - z23.s}, zt0, z13[0] // 11000000-10001100-10100001-10110100 +// CHECK-INST: luti2 { z20.s - z23.s }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0xa1,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ca1b4 + +luti2 {z28.s - z31.s}, zt0, z31[3] // 11000000-10001111-10100011-11111100 +// CHECK-INST: luti2 { z28.s - z31.s }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0xa3,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08fa3fc + + +luti2 {z0.b - z3.b}, zt0, z0[0] // 11000000-10001100-10000000-00000000 +// CHECK-INST: luti2 { z0.b - z3.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x80,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c8000 + +luti2 {z20.b - z23.b}, zt0, z10[1] // 11000000-10001101-10000001-01010100 +// CHECK-INST: luti2 { z20.b - z23.b }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x81,0x8d,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08d8154 + +luti2 {z20.b - z23.b}, zt0, z13[0] // 11000000-10001100-10000001-10110100 +// CHECK-INST: luti2 { z20.b - z23.b }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x81,0x8c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08c81b4 + +luti2 {z28.b - z31.b}, zt0, z31[3] // 11000000-10001111-10000011-11111100 +// CHECK-INST: luti2 { z28.b - z31.b }, zt0, z31[3] +// CHECK-ENCODING: [0xfc,0x83,0x8f,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08f83fc + diff --git a/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s b/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti4-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid lane indices + +luti4 z0.h, zt0, z0[8] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti4 z0.h, zt0, z0[8] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 z0.s, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 7]. +// CHECK-NEXT: luti4 z0.s, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.b-z1.b}, zt0, z0[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti4 {z0.b-z1.b}, zt0, z0[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.h-z1.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: luti4 {z0.h-z1.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.s-z3.s}, zt0, z0[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: luti4 {z0.s-z3.s}, zt0, z0[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z0.h-z3.h}, zt0, z0[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 1]. +// CHECK-NEXT: luti4 {z0.h-z3.h}, zt0, z0[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lists + +luti4 {z0.h-z2.h}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti4 {z0.h-z2.h}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z1.s-z2.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: luti4 {z1.s-z2.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +luti4 {z1.s-z4.s}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: luti4 {z1.s-z4.s}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector suffix + +luti4 {z0.d-z1.d}, zt0, z0[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: luti4 {z0.d-z1.d}, zt0, z0[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/luti4.s b/llvm/test/MC/AArch64/SME2/luti4.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/luti4.s @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +luti4 z0.h, zt0, z0[0] // 11000000-11001010-00010000-00000000 +// CHECK-INST: luti4 z0.h, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x10,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca1000 + +luti4 z21.h, zt0, z10[5] // 11000000-11001011-01010001-01010101 +// CHECK-INST: luti4 z21.h, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x51,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb5155 + +luti4 z23.h, zt0, z13[3] // 11000000-11001010-11010001-10110111 +// CHECK-INST: luti4 z23.h, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xd1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cad1b7 + +luti4 z31.h, zt0, z31[7] // 11000000-11001011-11010011-11111111 +// CHECK-INST: luti4 z31.h, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xd3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbd3ff + + +luti4 z0.s, zt0, z0[0] // 11000000-11001010-00100000-00000000 +// CHECK-INST: luti4 z0.s, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x20,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca2000 + +luti4 z21.s, zt0, z10[5] // 11000000-11001011-01100001-01010101 +// CHECK-INST: luti4 z21.s, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x61,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb6155 + +luti4 z23.s, zt0, z13[3] // 11000000-11001010-11100001-10110111 +// CHECK-INST: luti4 z23.s, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xe1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cae1b7 + +luti4 z31.s, zt0, z31[7] // 11000000-11001011-11100011-11111111 +// CHECK-INST: luti4 z31.s, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xe3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbe3ff + + +luti4 z0.b, zt0, z0[0] // 11000000-11001010-00000000-00000000 +// CHECK-INST: luti4 z0.b, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x00,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0ca0000 + +luti4 z21.b, zt0, z10[5] // 11000000-11001011-01000001-01010101 +// CHECK-INST: luti4 z21.b, zt0, z10[5] +// CHECK-ENCODING: [0x55,0x41,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cb4155 + +luti4 z23.b, zt0, z13[3] // 11000000-11001010-11000001-10110111 +// CHECK-INST: luti4 z23.b, zt0, z13[3] +// CHECK-ENCODING: [0xb7,0xc1,0xca,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cac1b7 + +luti4 z31.b, zt0, z31[7] // 11000000-11001011-11000011-11111111 +// CHECK-INST: luti4 z31.b, zt0, z31[7] +// CHECK-ENCODING: [0xff,0xc3,0xcb,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0cbc3ff + + +luti4 {z0.h - z1.h}, zt0, z0[0] // 11000000-10001010-01010000-00000000 +// CHECK-INST: luti4 { z0.h, z1.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x50,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a5000 + +luti4 {z20.h - z21.h}, zt0, z10[2] // 11000000-10001011-01010001-01010100 +// CHECK-INST: luti4 { z20.h, z21.h }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x51,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b5154 + +luti4 {z22.h - z23.h}, zt0, z13[1] // 11000000-10001010-11010001-10110110 +// CHECK-INST: luti4 { z22.h, z23.h }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xd1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ad1b6 + +luti4 {z30.h - z31.h}, zt0, z31[3] // 11000000-10001011-11010011-11111110 +// CHECK-INST: luti4 { z30.h, z31.h }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xd3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08bd3fe + + +luti4 {z0.s - z1.s}, zt0, z0[0] // 11000000-10001010-01100000-00000000 +// CHECK-INST: luti4 { z0.s, z1.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x60,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a6000 + +luti4 {z20.s - z21.s}, zt0, z10[2] // 11000000-10001011-01100001-01010100 +// CHECK-INST: luti4 { z20.s, z21.s }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x61,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b6154 + +luti4 {z22.s - z23.s}, zt0, z13[1] // 11000000-10001010-11100001-10110110 +// CHECK-INST: luti4 { z22.s, z23.s }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xe1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ae1b6 + +luti4 {z30.s - z31.s}, zt0, z31[3] // 11000000-10001011-11100011-11111110 +// CHECK-INST: luti4 { z30.s, z31.s }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xe3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08be3fe + + +luti4 {z0.b - z1.b}, zt0, z0[0] // 11000000-10001010-01000000-00000000 +// CHECK-INST: luti4 { z0.b, z1.b }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x40,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a4000 + +luti4 {z20.b - z21.b}, zt0, z10[2] // 11000000-10001011-01000001-01010100 +// CHECK-INST: luti4 { z20.b, z21.b }, zt0, z10[2] +// CHECK-ENCODING: [0x54,0x41,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b4154 + +luti4 {z22.b - z23.b}, zt0, z13[1] // 11000000-10001010-11000001-10110110 +// CHECK-INST: luti4 { z22.b, z23.b }, zt0, z13[1] +// CHECK-ENCODING: [0xb6,0xc1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ac1b6 + +luti4 {z30.b - z31.b}, zt0, z31[3] // 11000000-10001011-11000011-11111110 +// CHECK-INST: luti4 { z30.b, z31.b }, zt0, z31[3] +// CHECK-ENCODING: [0xfe,0xc3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08bc3fe + + +luti4 {z0.h - z3.h}, zt0, z0[0] // 11000000-10001010-10010000-00000000 +// CHECK-INST: luti4 { z0.h - z3.h }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0x90,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a9000 + +luti4 {z20.h - z23.h}, zt0, z10[1] // 11000000-10001011-10010001-01010100 +// CHECK-INST: luti4 { z20.h - z23.h }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0x91,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b9154 + +luti4 {z20.h - z23.h}, zt0, z13[0] // 11000000-10001010-10010001-10110100 +// CHECK-INST: luti4 { z20.h - z23.h }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0x91,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08a91b4 + +luti4 {z28.h - z31.h}, zt0, z31[1] // 11000000-10001011-10010011-11111100 +// CHECK-INST: luti4 { z28.h - z31.h }, zt0, z31[1] +// CHECK-ENCODING: [0xfc,0x93,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08b93fc + + +luti4 {z0.s - z3.s}, zt0, z0[0] // 11000000-10001010-10100000-00000000 +// CHECK-INST: luti4 { z0.s - z3.s }, zt0, z0[0] +// CHECK-ENCODING: [0x00,0xa0,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08aa000 + +luti4 {z20.s - z23.s}, zt0, z10[1] // 11000000-10001011-10100001-01010100 +// CHECK-INST: luti4 { z20.s - z23.s }, zt0, z10[1] +// CHECK-ENCODING: [0x54,0xa1,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ba154 + +luti4 {z20.s - z23.s}, zt0, z13[0] // 11000000-10001010-10100001-10110100 +// CHECK-INST: luti4 { z20.s - z23.s }, zt0, z13[0] +// CHECK-ENCODING: [0xb4,0xa1,0x8a,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08aa1b4 + +luti4 {z28.s - z31.s}, zt0, z31[1] // 11000000-10001011-10100011-11111100 +// CHECK-INST: luti4 { z28.s - z31.s }, zt0, z31[1] +// CHECK-ENCODING: [0xfc,0xa3,0x8b,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08ba3fc + diff --git a/llvm/test/MC/AArch64/SME2/mova-diagnostics.s b/llvm/test/MC/AArch64/SME2/mova-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/mova-diagnostics.s @@ -0,0 +1,73 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Multi-vector sequence constraints + +mova {z1.d-z2.d}, za.d[w12] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: mova {z1.d-z2.d}, za.d[w12] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mova {z1.d-z4.d}, za.d[w12] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: mova {z1.d-z4.d}, za.d[w12] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid index offset + +mova {z0.s, z1.s}, za0h.s[w12, 1:2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 2 in the range [0, 2], and the second immediate is immf + 1. +// CHECK-NEXT: mova {z0.s, z1.s}, za0h.s[w12, 1:2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mova {z0.s, z1.s}, za0h.s[w12, 3:4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 2 in the range [0, 2], and the second immediate is immf + 1. +// CHECK-NEXT: mova {z0.s, z1.s}, za0h.s[w12, 3:4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mova {z0.s, z1.s}, za0h.s[w12, 0:2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: mova {z0.s, z1.s}, za0h.s[w12, 0:2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mova {z20.d-z21.d}, za2h.d[w14, 0:3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: mova {z20.d-z21.d}, za2h.d[w14, 0:3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mova {z16.s-z19.s}, za1h.s[w14, 0:1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: mova {z16.s-z19.s}, za1h.s[w14, 0:1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid index (expected range) + +mova {z0.b-z3.b}, za0h.b[w13, 0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: mova {z0.b-z3.b}, za0h.b[w13, 0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Operands are not consistent + +mova za.h[w8, 0], {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: mova za.h[w8, 0], {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mov za.h[w8, 0], {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT:mov za.h[w8, 0], {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mova {z0.s-z3.s}, za.b[w8, 0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: mova {z0.s-z3.s}, za.b[w8, 0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +mov {z0.h-z3.h}, za.d[w8, 0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .h +// CHECK-NEXT: mov {z0.h-z3.h}, za.d[w8, 0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/mova.s b/llvm/test/MC/AArch64/SME2/mova.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/mova.s @@ -0,0 +1,5521 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +mova {z0.h, z1.h}, za0h.h[w12, 0:1] // 11000000-01000110-00000000-00000000 +// CHECK-INST: mov { z0.h, z1.h }, za0h.h[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460000 + +mova {z20.h, z21.h}, za0h.h[w14, 4:5] // 11000000-01000110-01000000-01010100 +// CHECK-INST: mov { z20.h, z21.h }, za0h.h[w14, 4:5] +// CHECK-ENCODING: [0x54,0x40,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464054 + +mova {z22.h, z23.h}, za1h.h[w15, 2:3] // 11000000-01000110-01100000-10110110 +// CHECK-INST: mov { z22.h, z23.h }, za1h.h[w15, 2:3] +// CHECK-ENCODING: [0xb6,0x60,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04660b6 + +mova {z30.h, z31.h}, za1h.h[w15, 6:7] // 11000000-01000110-01100000-11111110 +// CHECK-INST: mov { z30.h, z31.h }, za1h.h[w15, 6:7] +// CHECK-ENCODING: [0xfe,0x60,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04660fe + +mova {z4.h, z5.h}, za0h.h[w12, 2:3] // 11000000-01000110-00000000-00100100 +// CHECK-INST: mov { z4.h, z5.h }, za0h.h[w12, 2:3] +// CHECK-ENCODING: [0x24,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460024 + +mova {z0.h, z1.h}, za0h.h[w12, 2:3] // 11000000-01000110-00000000-00100000 +// CHECK-INST: mov { z0.h, z1.h }, za0h.h[w12, 2:3] +// CHECK-ENCODING: [0x20,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460020 + +mova {z24.h, z25.h}, za0h.h[w14, 6:7] // 11000000-01000110-01000000-01111000 +// CHECK-INST: mov { z24.h, z25.h }, za0h.h[w14, 6:7] +// CHECK-ENCODING: [0x78,0x40,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464078 + +mova {z0.h, z1.h}, za1h.h[w12, 0:1] // 11000000-01000110-00000000-10000000 +// CHECK-INST: mov { z0.h, z1.h }, za1h.h[w12, 0:1] +// CHECK-ENCODING: [0x80,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460080 + +mova {z16.h, z17.h}, za0h.h[w14, 2:3] // 11000000-01000110-01000000-00110000 +// CHECK-INST: mov { z16.h, z17.h }, za0h.h[w14, 2:3] +// CHECK-ENCODING: [0x30,0x40,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464030 + +mova {z28.h, z29.h}, za1h.h[w12, 4:5] // 11000000-01000110-00000000-11011100 +// CHECK-INST: mov { z28.h, z29.h }, za1h.h[w12, 4:5] +// CHECK-ENCODING: [0xdc,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04600dc + +mova {z2.h, z3.h}, za0h.h[w15, 2:3] // 11000000-01000110-01100000-00100010 +// CHECK-INST: mov { z2.h, z3.h }, za0h.h[w15, 2:3] +// CHECK-ENCODING: [0x22,0x60,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0466022 + +mova {z6.h, z7.h}, za1h.h[w13, 0:1] // 11000000-01000110-00100000-10000110 +// CHECK-INST: mov { z6.h, z7.h }, za1h.h[w13, 0:1] +// CHECK-ENCODING: [0x86,0x20,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0462086 + +// Aliases + +mov {z0.h, z1.h}, za0h.h[w12, 0:1] // 11000000-01000110-00000000-00000000 +// CHECK-INST: mov { z0.h, z1.h }, za0h.h[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460000 + +mov {z20.h, z21.h}, za0h.h[w14, 4:5] // 11000000-01000110-01000000-01010100 +// CHECK-INST: mov { z20.h, z21.h }, za0h.h[w14, 4:5] +// CHECK-ENCODING: [0x54,0x40,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464054 + +mov {z22.h, z23.h}, za1h.h[w15, 2:3] // 11000000-01000110-01100000-10110110 +// CHECK-INST: mov { z22.h, z23.h }, za1h.h[w15, 2:3] +// CHECK-ENCODING: [0xb6,0x60,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04660b6 + +mov {z30.h, z31.h}, za1h.h[w15, 6:7] // 11000000-01000110-01100000-11111110 +// CHECK-INST: mov { z30.h, z31.h }, za1h.h[w15, 6:7] +// CHECK-ENCODING: [0xfe,0x60,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04660fe + +mov {z4.h, z5.h}, za0h.h[w12, 2:3] // 11000000-01000110-00000000-00100100 +// CHECK-INST: mov { z4.h, z5.h }, za0h.h[w12, 2:3] +// CHECK-ENCODING: [0x24,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460024 + +mov {z0.h, z1.h}, za0h.h[w12, 2:3] // 11000000-01000110-00000000-00100000 +// CHECK-INST: mov { z0.h, z1.h }, za0h.h[w12, 2:3] +// CHECK-ENCODING: [0x20,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460020 + +mov {z24.h, z25.h}, za0h.h[w14, 6:7] // 11000000-01000110-01000000-01111000 +// CHECK-INST: mov { z24.h, z25.h }, za0h.h[w14, 6:7] +// CHECK-ENCODING: [0x78,0x40,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464078 + +mov {z0.h, z1.h}, za1h.h[w12, 0:1] // 11000000-01000110-00000000-10000000 +// CHECK-INST: mov { z0.h, z1.h }, za1h.h[w12, 0:1] +// CHECK-ENCODING: [0x80,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460080 + +mov {z16.h, z17.h}, za0h.h[w14, 2:3] // 11000000-01000110-01000000-00110000 +// CHECK-INST: mov { z16.h, z17.h }, za0h.h[w14, 2:3] +// CHECK-ENCODING: [0x30,0x40,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464030 + +mov {z28.h, z29.h}, za1h.h[w12, 4:5] // 11000000-01000110-00000000-11011100 +// CHECK-INST: mov { z28.h, z29.h }, za1h.h[w12, 4:5] +// CHECK-ENCODING: [0xdc,0x00,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04600dc + +mov {z2.h, z3.h}, za0h.h[w15, 2:3] // 11000000-01000110-01100000-00100010 +// CHECK-INST: mov { z2.h, z3.h }, za0h.h[w15, 2:3] +// CHECK-ENCODING: [0x22,0x60,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0466022 + +mov {z6.h, z7.h}, za1h.h[w13, 0:1] // 11000000-01000110-00100000-10000110 +// CHECK-INST: mov { z6.h, z7.h }, za1h.h[w13, 0:1] +// CHECK-ENCODING: [0x86,0x20,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0462086 + + +mova {z0.h, z1.h}, za0v.h[w12, 0:1] // 11000000-01000110-10000000-00000000 +// CHECK-INST: mov { z0.h, z1.h }, za0v.h[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468000 + +mova {z20.h, z21.h}, za0v.h[w14, 4:5] // 11000000-01000110-11000000-01010100 +// CHECK-INST: mov { z20.h, z21.h }, za0v.h[w14, 4:5] +// CHECK-ENCODING: [0x54,0xc0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c054 + +mova {z22.h, z23.h}, za1v.h[w15, 2:3] // 11000000-01000110-11100000-10110110 +// CHECK-INST: mov { z22.h, z23.h }, za1v.h[w15, 2:3] +// CHECK-ENCODING: [0xb6,0xe0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e0b6 + +mova {z30.h, z31.h}, za1v.h[w15, 6:7] // 11000000-01000110-11100000-11111110 +// CHECK-INST: mov { z30.h, z31.h }, za1v.h[w15, 6:7] +// CHECK-ENCODING: [0xfe,0xe0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e0fe + +mova {z4.h, z5.h}, za0v.h[w12, 2:3] // 11000000-01000110-10000000-00100100 +// CHECK-INST: mov { z4.h, z5.h }, za0v.h[w12, 2:3] +// CHECK-ENCODING: [0x24,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468024 + +mova {z0.h, z1.h}, za0v.h[w12, 2:3] // 11000000-01000110-10000000-00100000 +// CHECK-INST: mov { z0.h, z1.h }, za0v.h[w12, 2:3] +// CHECK-ENCODING: [0x20,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468020 + +mova {z24.h, z25.h}, za0v.h[w14, 6:7] // 11000000-01000110-11000000-01111000 +// CHECK-INST: mov { z24.h, z25.h }, za0v.h[w14, 6:7] +// CHECK-ENCODING: [0x78,0xc0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c078 + +mova {z0.h, z1.h}, za1v.h[w12, 0:1] // 11000000-01000110-10000000-10000000 +// CHECK-INST: mov { z0.h, z1.h }, za1v.h[w12, 0:1] +// CHECK-ENCODING: [0x80,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468080 + +mova {z16.h, z17.h}, za0v.h[w14, 2:3] // 11000000-01000110-11000000-00110000 +// CHECK-INST: mov { z16.h, z17.h }, za0v.h[w14, 2:3] +// CHECK-ENCODING: [0x30,0xc0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c030 + +mova {z28.h, z29.h}, za1v.h[w12, 4:5] // 11000000-01000110-10000000-11011100 +// CHECK-INST: mov { z28.h, z29.h }, za1v.h[w12, 4:5] +// CHECK-ENCODING: [0xdc,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04680dc + +mova {z2.h, z3.h}, za0v.h[w15, 2:3] // 11000000-01000110-11100000-00100010 +// CHECK-INST: mov { z2.h, z3.h }, za0v.h[w15, 2:3] +// CHECK-ENCODING: [0x22,0xe0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e022 + +mova {z6.h, z7.h}, za1v.h[w13, 0:1] // 11000000-01000110-10100000-10000110 +// CHECK-INST: mov { z6.h, z7.h }, za1v.h[w13, 0:1] +// CHECK-ENCODING: [0x86,0xa0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046a086 + +// Aliases + +mov {z0.h, z1.h}, za0v.h[w12, 0:1] // 11000000-01000110-10000000-00000000 +// CHECK-INST: mov { z0.h, z1.h }, za0v.h[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468000 + +mov {z20.h, z21.h}, za0v.h[w14, 4:5] // 11000000-01000110-11000000-01010100 +// CHECK-INST: mov { z20.h, z21.h }, za0v.h[w14, 4:5] +// CHECK-ENCODING: [0x54,0xc0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c054 + +mov {z22.h, z23.h}, za1v.h[w15, 2:3] // 11000000-01000110-11100000-10110110 +// CHECK-INST: mov { z22.h, z23.h }, za1v.h[w15, 2:3] +// CHECK-ENCODING: [0xb6,0xe0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e0b6 + +mov {z30.h, z31.h}, za1v.h[w15, 6:7] // 11000000-01000110-11100000-11111110 +// CHECK-INST: mov { z30.h, z31.h }, za1v.h[w15, 6:7] +// CHECK-ENCODING: [0xfe,0xe0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e0fe + +mov {z4.h, z5.h}, za0v.h[w12, 2:3] // 11000000-01000110-10000000-00100100 +// CHECK-INST: mov { z4.h, z5.h }, za0v.h[w12, 2:3] +// CHECK-ENCODING: [0x24,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468024 + +mov {z0.h, z1.h}, za0v.h[w12, 2:3] // 11000000-01000110-10000000-00100000 +// CHECK-INST: mov { z0.h, z1.h }, za0v.h[w12, 2:3] +// CHECK-ENCODING: [0x20,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468020 + +mov {z24.h, z25.h}, za0v.h[w14, 6:7] // 11000000-01000110-11000000-01111000 +// CHECK-INST: mov { z24.h, z25.h }, za0v.h[w14, 6:7] +// CHECK-ENCODING: [0x78,0xc0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c078 + +mov {z0.h, z1.h}, za1v.h[w12, 0:1] // 11000000-01000110-10000000-10000000 +// CHECK-INST: mov { z0.h, z1.h }, za1v.h[w12, 0:1] +// CHECK-ENCODING: [0x80,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468080 + +mov {z16.h, z17.h}, za0v.h[w14, 2:3] // 11000000-01000110-11000000-00110000 +// CHECK-INST: mov { z16.h, z17.h }, za0v.h[w14, 2:3] +// CHECK-ENCODING: [0x30,0xc0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c030 + +mov {z28.h, z29.h}, za1v.h[w12, 4:5] // 11000000-01000110-10000000-11011100 +// CHECK-INST: mov { z28.h, z29.h }, za1v.h[w12, 4:5] +// CHECK-ENCODING: [0xdc,0x80,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04680dc + +mov {z2.h, z3.h}, za0v.h[w15, 2:3] // 11000000-01000110-11100000-00100010 +// CHECK-INST: mov { z2.h, z3.h }, za0v.h[w15, 2:3] +// CHECK-ENCODING: [0x22,0xe0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e022 + +mov {z6.h, z7.h}, za1v.h[w13, 0:1] // 11000000-01000110-10100000-10000110 +// CHECK-INST: mov { z6.h, z7.h }, za1v.h[w13, 0:1] +// CHECK-ENCODING: [0x86,0xa0,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046a086 + + +mova za0h.h[w12, 0:1], {z0.h, z1.h} // 11000000-01000100-00000000-00000000 +// CHECK-INST: mov za0h.h[w12, 0:1], { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x00,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440000 + +mova za1h.h[w14, 2:3], {z10.h, z11.h} // 11000000-01000100-01000001-01000101 +// CHECK-INST: mov za1h.h[w14, 2:3], { z10.h, z11.h } +// CHECK-ENCODING: [0x45,0x41,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444145 + +mova za1h.h[w15, 6:7], {z12.h, z13.h} // 11000000-01000100-01100001-10000111 +// CHECK-INST: mov za1h.h[w15, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0x61,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446187 + +mova za1h.h[w15, 6:7], {z30.h, z31.h} // 11000000-01000100-01100011-11000111 +// CHECK-INST: mov za1h.h[w15, 6:7], { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0x63,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04463c7 + +mova za1h.h[w12, 2:3], {z16.h, z17.h} // 11000000-01000100-00000010-00000101 +// CHECK-INST: mov za1h.h[w12, 2:3], { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x02,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440205 + +mova za0h.h[w12, 2:3], {z0.h, z1.h} // 11000000-01000100-00000000-00000001 +// CHECK-INST: mov za0h.h[w12, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0x00,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440001 + +mova za0h.h[w14, 0:1], {z18.h, z19.h} // 11000000-01000100-01000010-01000000 +// CHECK-INST: mov za0h.h[w14, 0:1], { z18.h, z19.h } +// CHECK-ENCODING: [0x40,0x42,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444240 + +mova za0h.h[w12, 0:1], {z12.h, z13.h} // 11000000-01000100-00000001-10000000 +// CHECK-INST: mov za0h.h[w12, 0:1], { z12.h, z13.h } +// CHECK-ENCODING: [0x80,0x01,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440180 + +mova za0h.h[w14, 2:3], {z0.h, z1.h} // 11000000-01000100-01000000-00000001 +// CHECK-INST: mov za0h.h[w14, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0x40,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444001 + +mova za1h.h[w12, 2:3], {z22.h, z23.h} // 11000000-01000100-00000010-11000101 +// CHECK-INST: mov za1h.h[w12, 2:3], { z22.h, z23.h } +// CHECK-ENCODING: [0xc5,0x02,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04402c5 + +mova za0h.h[w15, 4:5], {z8.h, z9.h} // 11000000-01000100-01100001-00000010 +// CHECK-INST: mov za0h.h[w15, 4:5], { z8.h, z9.h } +// CHECK-ENCODING: [0x02,0x61,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446102 + +mova za1h.h[w13, 6:7], {z12.h, z13.h} // 11000000-01000100-00100001-10000111 +// CHECK-INST: mov za1h.h[w13, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0x21,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0442187 + +// Aliases + +mov za0h.h[w12, 0:1], {z0.h, z1.h} // 11000000-01000100-00000000-00000000 +// CHECK-INST: mov za0h.h[w12, 0:1], { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x00,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440000 + +mov za1h.h[w14, 2:3], {z10.h, z11.h} // 11000000-01000100-01000001-01000101 +// CHECK-INST: mov za1h.h[w14, 2:3], { z10.h, z11.h } +// CHECK-ENCODING: [0x45,0x41,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444145 + +mov za1h.h[w15, 6:7], {z12.h, z13.h} // 11000000-01000100-01100001-10000111 +// CHECK-INST: mov za1h.h[w15, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0x61,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446187 + +mov za1h.h[w15, 6:7], {z30.h, z31.h} // 11000000-01000100-01100011-11000111 +// CHECK-INST: mov za1h.h[w15, 6:7], { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0x63,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04463c7 + +mov za1h.h[w12, 2:3], {z16.h, z17.h} // 11000000-01000100-00000010-00000101 +// CHECK-INST: mov za1h.h[w12, 2:3], { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x02,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440205 + +mov za0h.h[w12, 2:3], {z0.h, z1.h} // 11000000-01000100-00000000-00000001 +// CHECK-INST: mov za0h.h[w12, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0x00,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440001 + +mov za0h.h[w14, 0:1], {z18.h, z19.h} // 11000000-01000100-01000010-01000000 +// CHECK-INST: mov za0h.h[w14, 0:1], { z18.h, z19.h } +// CHECK-ENCODING: [0x40,0x42,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444240 + +mov za0h.h[w12, 0:1], {z12.h, z13.h} // 11000000-01000100-00000001-10000000 +// CHECK-INST: mov za0h.h[w12, 0:1], { z12.h, z13.h } +// CHECK-ENCODING: [0x80,0x01,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440180 + +mov za0h.h[w14, 2:3], {z0.h, z1.h} // 11000000-01000100-01000000-00000001 +// CHECK-INST: mov za0h.h[w14, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0x40,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444001 + +mov za1h.h[w12, 2:3], {z22.h, z23.h} // 11000000-01000100-00000010-11000101 +// CHECK-INST: mov za1h.h[w12, 2:3], { z22.h, z23.h } +// CHECK-ENCODING: [0xc5,0x02,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04402c5 + +mov za0h.h[w15, 4:5], {z8.h, z9.h} // 11000000-01000100-01100001-00000010 +// CHECK-INST: mov za0h.h[w15, 4:5], { z8.h, z9.h } +// CHECK-ENCODING: [0x02,0x61,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446102 + +mov za1h.h[w13, 6:7], {z12.h, z13.h} // 11000000-01000100-00100001-10000111 +// CHECK-INST: mov za1h.h[w13, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0x21,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0442187 + + +mova za0v.h[w12, 0:1], {z0.h, z1.h} // 11000000-01000100-10000000-00000000 +// CHECK-INST: mov za0v.h[w12, 0:1], { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x80,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448000 + +mova za1v.h[w14, 2:3], {z10.h, z11.h} // 11000000-01000100-11000001-01000101 +// CHECK-INST: mov za1v.h[w14, 2:3], { z10.h, z11.h } +// CHECK-ENCODING: [0x45,0xc1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c145 + +mova za1v.h[w15, 6:7], {z12.h, z13.h} // 11000000-01000100-11100001-10000111 +// CHECK-INST: mov za1v.h[w15, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0xe1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e187 + +mova za1v.h[w15, 6:7], {z30.h, z31.h} // 11000000-01000100-11100011-11000111 +// CHECK-INST: mov za1v.h[w15, 6:7], { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0xe3,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e3c7 + +mova za1v.h[w12, 2:3], {z16.h, z17.h} // 11000000-01000100-10000010-00000101 +// CHECK-INST: mov za1v.h[w12, 2:3], { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x82,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448205 + +mova za0v.h[w12, 2:3], {z0.h, z1.h} // 11000000-01000100-10000000-00000001 +// CHECK-INST: mov za0v.h[w12, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0x80,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448001 + +mova za0v.h[w14, 0:1], {z18.h, z19.h} // 11000000-01000100-11000010-01000000 +// CHECK-INST: mov za0v.h[w14, 0:1], { z18.h, z19.h } +// CHECK-ENCODING: [0x40,0xc2,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c240 + +mova za0v.h[w12, 0:1], {z12.h, z13.h} // 11000000-01000100-10000001-10000000 +// CHECK-INST: mov za0v.h[w12, 0:1], { z12.h, z13.h } +// CHECK-ENCODING: [0x80,0x81,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448180 + +mova za0v.h[w14, 2:3], {z0.h, z1.h} // 11000000-01000100-11000000-00000001 +// CHECK-INST: mov za0v.h[w14, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xc0,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c001 + +mova za1v.h[w12, 2:3], {z22.h, z23.h} // 11000000-01000100-10000010-11000101 +// CHECK-INST: mov za1v.h[w12, 2:3], { z22.h, z23.h } +// CHECK-ENCODING: [0xc5,0x82,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04482c5 + +mova za0v.h[w15, 4:5], {z8.h, z9.h} // 11000000-01000100-11100001-00000010 +// CHECK-INST: mov za0v.h[w15, 4:5], { z8.h, z9.h } +// CHECK-ENCODING: [0x02,0xe1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e102 + +mova za1v.h[w13, 6:7], {z12.h, z13.h} // 11000000-01000100-10100001-10000111 +// CHECK-INST: mov za1v.h[w13, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0xa1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044a187 + +// Aliases + +mov za0v.h[w12, 0:1], {z0.h, z1.h} // 11000000-01000100-10000000-00000000 +// CHECK-INST: mov za0v.h[w12, 0:1], { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x80,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448000 + +mov za1v.h[w14, 2:3], {z10.h, z11.h} // 11000000-01000100-11000001-01000101 +// CHECK-INST: mov za1v.h[w14, 2:3], { z10.h, z11.h } +// CHECK-ENCODING: [0x45,0xc1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c145 + +mov za1v.h[w15, 6:7], {z12.h, z13.h} // 11000000-01000100-11100001-10000111 +// CHECK-INST: mov za1v.h[w15, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0xe1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e187 + +mov za1v.h[w15, 6:7], {z30.h, z31.h} // 11000000-01000100-11100011-11000111 +// CHECK-INST: mov za1v.h[w15, 6:7], { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0xe3,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e3c7 + +mov za1v.h[w12, 2:3], {z16.h, z17.h} // 11000000-01000100-10000010-00000101 +// CHECK-INST: mov za1v.h[w12, 2:3], { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x82,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448205 + +mov za0v.h[w12, 2:3], {z0.h, z1.h} // 11000000-01000100-10000000-00000001 +// CHECK-INST: mov za0v.h[w12, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0x80,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448001 + +mov za0v.h[w14, 0:1], {z18.h, z19.h} // 11000000-01000100-11000010-01000000 +// CHECK-INST: mov za0v.h[w14, 0:1], { z18.h, z19.h } +// CHECK-ENCODING: [0x40,0xc2,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c240 + +mov za0v.h[w12, 0:1], {z12.h, z13.h} // 11000000-01000100-10000001-10000000 +// CHECK-INST: mov za0v.h[w12, 0:1], { z12.h, z13.h } +// CHECK-ENCODING: [0x80,0x81,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448180 + +mov za0v.h[w14, 2:3], {z0.h, z1.h} // 11000000-01000100-11000000-00000001 +// CHECK-INST: mov za0v.h[w14, 2:3], { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xc0,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c001 + +mov za1v.h[w12, 2:3], {z22.h, z23.h} // 11000000-01000100-10000010-11000101 +// CHECK-INST: mov za1v.h[w12, 2:3], { z22.h, z23.h } +// CHECK-ENCODING: [0xc5,0x82,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04482c5 + +mov za0v.h[w15, 4:5], {z8.h, z9.h} // 11000000-01000100-11100001-00000010 +// CHECK-INST: mov za0v.h[w15, 4:5], { z8.h, z9.h } +// CHECK-ENCODING: [0x02,0xe1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e102 + +mov za1v.h[w13, 6:7], {z12.h, z13.h} // 11000000-01000100-10100001-10000111 +// CHECK-INST: mov za1v.h[w13, 6:7], { z12.h, z13.h } +// CHECK-ENCODING: [0x87,0xa1,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044a187 + + +mova {z0.s, z1.s}, za0h.s[w12, 0:1] // 11000000-10000110-00000000-00000000 +// CHECK-INST: mov { z0.s, z1.s }, za0h.s[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860000 + +mova {z20.s, z21.s}, za1h.s[w14, 0:1] // 11000000-10000110-01000000-01010100 +// CHECK-INST: mov { z20.s, z21.s }, za1h.s[w14, 0:1] +// CHECK-ENCODING: [0x54,0x40,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864054 + +mova {z22.s, z23.s}, za2h.s[w15, 2:3] // 11000000-10000110-01100000-10110110 +// CHECK-INST: mov { z22.s, z23.s }, za2h.s[w15, 2:3] +// CHECK-ENCODING: [0xb6,0x60,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08660b6 + +mova {z30.s, z31.s}, za3h.s[w15, 2:3] // 11000000-10000110-01100000-11111110 +// CHECK-INST: mov { z30.s, z31.s }, za3h.s[w15, 2:3] +// CHECK-ENCODING: [0xfe,0x60,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08660fe + +mova {z4.s, z5.s}, za0h.s[w12, 2:3] // 11000000-10000110-00000000-00100100 +// CHECK-INST: mov { z4.s, z5.s }, za0h.s[w12, 2:3] +// CHECK-ENCODING: [0x24,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860024 + +mova {z0.s, z1.s}, za0h.s[w12, 2:3] // 11000000-10000110-00000000-00100000 +// CHECK-INST: mov { z0.s, z1.s }, za0h.s[w12, 2:3] +// CHECK-ENCODING: [0x20,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860020 + +mova {z24.s, z25.s}, za1h.s[w14, 2:3] // 11000000-10000110-01000000-01111000 +// CHECK-INST: mov { z24.s, z25.s }, za1h.s[w14, 2:3] +// CHECK-ENCODING: [0x78,0x40,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864078 + +mova {z0.s, z1.s}, za2h.s[w12, 0:1] // 11000000-10000110-00000000-10000000 +// CHECK-INST: mov { z0.s, z1.s }, za2h.s[w12, 0:1] +// CHECK-ENCODING: [0x80,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860080 + +mova {z16.s, z17.s}, za0h.s[w14, 2:3] // 11000000-10000110-01000000-00110000 +// CHECK-INST: mov { z16.s, z17.s }, za0h.s[w14, 2:3] +// CHECK-ENCODING: [0x30,0x40,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864030 + +mova {z28.s, z29.s}, za3h.s[w12, 0:1] // 11000000-10000110-00000000-11011100 +// CHECK-INST: mov { z28.s, z29.s }, za3h.s[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08600dc + +mova {z2.s, z3.s}, za0h.s[w15, 2:3] // 11000000-10000110-01100000-00100010 +// CHECK-INST: mov { z2.s, z3.s }, za0h.s[w15, 2:3] +// CHECK-ENCODING: [0x22,0x60,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0866022 + +mova {z6.s, z7.s}, za2h.s[w13, 0:1] // 11000000-10000110-00100000-10000110 +// CHECK-INST: mov { z6.s, z7.s }, za2h.s[w13, 0:1] +// CHECK-ENCODING: [0x86,0x20,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0862086 + +// Aliases + +mov {z0.s, z1.s}, za0h.s[w12, 0:1] // 11000000-10000110-00000000-00000000 +// CHECK-INST: mov { z0.s, z1.s }, za0h.s[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860000 + +mov {z20.s, z21.s}, za1h.s[w14, 0:1] // 11000000-10000110-01000000-01010100 +// CHECK-INST: mov { z20.s, z21.s }, za1h.s[w14, 0:1] +// CHECK-ENCODING: [0x54,0x40,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864054 + +mov {z22.s, z23.s}, za2h.s[w15, 2:3] // 11000000-10000110-01100000-10110110 +// CHECK-INST: mov { z22.s, z23.s }, za2h.s[w15, 2:3] +// CHECK-ENCODING: [0xb6,0x60,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08660b6 + +mov {z30.s, z31.s}, za3h.s[w15, 2:3] // 11000000-10000110-01100000-11111110 +// CHECK-INST: mov { z30.s, z31.s }, za3h.s[w15, 2:3] +// CHECK-ENCODING: [0xfe,0x60,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08660fe + +mov {z4.s, z5.s}, za0h.s[w12, 2:3] // 11000000-10000110-00000000-00100100 +// CHECK-INST: mov { z4.s, z5.s }, za0h.s[w12, 2:3] +// CHECK-ENCODING: [0x24,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860024 + +mov {z0.s, z1.s}, za0h.s[w12, 2:3] // 11000000-10000110-00000000-00100000 +// CHECK-INST: mov { z0.s, z1.s }, za0h.s[w12, 2:3] +// CHECK-ENCODING: [0x20,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860020 + +mov {z24.s, z25.s}, za1h.s[w14, 2:3] // 11000000-10000110-01000000-01111000 +// CHECK-INST: mov { z24.s, z25.s }, za1h.s[w14, 2:3] +// CHECK-ENCODING: [0x78,0x40,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864078 + +mov {z0.s, z1.s}, za2h.s[w12, 0:1] // 11000000-10000110-00000000-10000000 +// CHECK-INST: mov { z0.s, z1.s }, za2h.s[w12, 0:1] +// CHECK-ENCODING: [0x80,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860080 + +mov {z16.s, z17.s}, za0h.s[w14, 2:3] // 11000000-10000110-01000000-00110000 +// CHECK-INST: mov { z16.s, z17.s }, za0h.s[w14, 2:3] +// CHECK-ENCODING: [0x30,0x40,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864030 + +mov {z28.s, z29.s}, za3h.s[w12, 0:1] // 11000000-10000110-00000000-11011100 +// CHECK-INST: mov { z28.s, z29.s }, za3h.s[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x00,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08600dc + +mov {z2.s, z3.s}, za0h.s[w15, 2:3] // 11000000-10000110-01100000-00100010 +// CHECK-INST: mov { z2.s, z3.s }, za0h.s[w15, 2:3] +// CHECK-ENCODING: [0x22,0x60,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0866022 + +mov {z6.s, z7.s}, za2h.s[w13, 0:1] // 11000000-10000110-00100000-10000110 +// CHECK-INST: mov { z6.s, z7.s }, za2h.s[w13, 0:1] +// CHECK-ENCODING: [0x86,0x20,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0862086 + + +mova {z0.s, z1.s}, za0v.s[w12, 0:1] // 11000000-10000110-10000000-00000000 +// CHECK-INST: mov { z0.s, z1.s }, za0v.s[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868000 + +mova {z20.s, z21.s}, za1v.s[w14, 0:1] // 11000000-10000110-11000000-01010100 +// CHECK-INST: mov { z20.s, z21.s }, za1v.s[w14, 0:1] +// CHECK-ENCODING: [0x54,0xc0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c054 + +mova {z22.s, z23.s}, za2v.s[w15, 2:3] // 11000000-10000110-11100000-10110110 +// CHECK-INST: mov { z22.s, z23.s }, za2v.s[w15, 2:3] +// CHECK-ENCODING: [0xb6,0xe0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e0b6 + +mova {z30.s, z31.s}, za3v.s[w15, 2:3] // 11000000-10000110-11100000-11111110 +// CHECK-INST: mov { z30.s, z31.s }, za3v.s[w15, 2:3] +// CHECK-ENCODING: [0xfe,0xe0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e0fe + +mova {z4.s, z5.s}, za0v.s[w12, 2:3] // 11000000-10000110-10000000-00100100 +// CHECK-INST: mov { z4.s, z5.s }, za0v.s[w12, 2:3] +// CHECK-ENCODING: [0x24,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868024 + +mova {z0.s, z1.s}, za0v.s[w12, 2:3] // 11000000-10000110-10000000-00100000 +// CHECK-INST: mov { z0.s, z1.s }, za0v.s[w12, 2:3] +// CHECK-ENCODING: [0x20,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868020 + +mova {z24.s, z25.s}, za1v.s[w14, 2:3] // 11000000-10000110-11000000-01111000 +// CHECK-INST: mov { z24.s, z25.s }, za1v.s[w14, 2:3] +// CHECK-ENCODING: [0x78,0xc0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c078 + +mova {z0.s, z1.s}, za2v.s[w12, 0:1] // 11000000-10000110-10000000-10000000 +// CHECK-INST: mov { z0.s, z1.s }, za2v.s[w12, 0:1] +// CHECK-ENCODING: [0x80,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868080 + +mova {z16.s, z17.s}, za0v.s[w14, 2:3] // 11000000-10000110-11000000-00110000 +// CHECK-INST: mov { z16.s, z17.s }, za0v.s[w14, 2:3] +// CHECK-ENCODING: [0x30,0xc0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c030 + +mova {z28.s, z29.s}, za3v.s[w12, 0:1] // 11000000-10000110-10000000-11011100 +// CHECK-INST: mov { z28.s, z29.s }, za3v.s[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08680dc + +mova {z2.s, z3.s}, za0v.s[w15, 2:3] // 11000000-10000110-11100000-00100010 +// CHECK-INST: mov { z2.s, z3.s }, za0v.s[w15, 2:3] +// CHECK-ENCODING: [0x22,0xe0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e022 + +mova {z6.s, z7.s}, za2v.s[w13, 0:1] // 11000000-10000110-10100000-10000110 +// CHECK-INST: mov { z6.s, z7.s }, za2v.s[w13, 0:1] +// CHECK-ENCODING: [0x86,0xa0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086a086 + +// Aliases + +mov {z0.s, z1.s}, za0v.s[w12, 0:1] // 11000000-10000110-10000000-00000000 +// CHECK-INST: mov { z0.s, z1.s }, za0v.s[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868000 + +mov {z20.s, z21.s}, za1v.s[w14, 0:1] // 11000000-10000110-11000000-01010100 +// CHECK-INST: mov { z20.s, z21.s }, za1v.s[w14, 0:1] +// CHECK-ENCODING: [0x54,0xc0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c054 + +mov {z22.s, z23.s}, za2v.s[w15, 2:3] // 11000000-10000110-11100000-10110110 +// CHECK-INST: mov { z22.s, z23.s }, za2v.s[w15, 2:3] +// CHECK-ENCODING: [0xb6,0xe0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e0b6 + +mov {z30.s, z31.s}, za3v.s[w15, 2:3] // 11000000-10000110-11100000-11111110 +// CHECK-INST: mov { z30.s, z31.s }, za3v.s[w15, 2:3] +// CHECK-ENCODING: [0xfe,0xe0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e0fe + +mov {z4.s, z5.s}, za0v.s[w12, 2:3] // 11000000-10000110-10000000-00100100 +// CHECK-INST: mov { z4.s, z5.s }, za0v.s[w12, 2:3] +// CHECK-ENCODING: [0x24,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868024 + +mov {z0.s, z1.s}, za0v.s[w12, 2:3] // 11000000-10000110-10000000-00100000 +// CHECK-INST: mov { z0.s, z1.s }, za0v.s[w12, 2:3] +// CHECK-ENCODING: [0x20,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868020 + +mov {z24.s, z25.s}, za1v.s[w14, 2:3] // 11000000-10000110-11000000-01111000 +// CHECK-INST: mov { z24.s, z25.s }, za1v.s[w14, 2:3] +// CHECK-ENCODING: [0x78,0xc0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c078 + +mov {z0.s, z1.s}, za2v.s[w12, 0:1] // 11000000-10000110-10000000-10000000 +// CHECK-INST: mov { z0.s, z1.s }, za2v.s[w12, 0:1] +// CHECK-ENCODING: [0x80,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868080 + +mov {z16.s, z17.s}, za0v.s[w14, 2:3] // 11000000-10000110-11000000-00110000 +// CHECK-INST: mov { z16.s, z17.s }, za0v.s[w14, 2:3] +// CHECK-ENCODING: [0x30,0xc0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c030 + +mov {z28.s, z29.s}, za3v.s[w12, 0:1] // 11000000-10000110-10000000-11011100 +// CHECK-INST: mov { z28.s, z29.s }, za3v.s[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x80,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08680dc + +mov {z2.s, z3.s}, za0v.s[w15, 2:3] // 11000000-10000110-11100000-00100010 +// CHECK-INST: mov { z2.s, z3.s }, za0v.s[w15, 2:3] +// CHECK-ENCODING: [0x22,0xe0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e022 + +mov {z6.s, z7.s}, za2v.s[w13, 0:1] // 11000000-10000110-10100000-10000110 +// CHECK-INST: mov { z6.s, z7.s }, za2v.s[w13, 0:1] +// CHECK-ENCODING: [0x86,0xa0,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086a086 + + +mova za0h.s[w12, 0:1], {z0.s, z1.s} // 11000000-10000100-00000000-00000000 +// CHECK-INST: mov za0h.s[w12, 0:1], { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x00,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840000 + +mova za2h.s[w14, 2:3], {z10.s, z11.s} // 11000000-10000100-01000001-01000101 +// CHECK-INST: mov za2h.s[w14, 2:3], { z10.s, z11.s } +// CHECK-ENCODING: [0x45,0x41,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844145 + +mova za3h.s[w15, 2:3], {z12.s, z13.s} // 11000000-10000100-01100001-10000111 +// CHECK-INST: mov za3h.s[w15, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0x61,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846187 + +mova za3h.s[w15, 2:3], {z30.s, z31.s} // 11000000-10000100-01100011-11000111 +// CHECK-INST: mov za3h.s[w15, 2:3], { z30.s, z31.s } +// CHECK-ENCODING: [0xc7,0x63,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08463c7 + +mova za2h.s[w12, 2:3], {z16.s, z17.s} // 11000000-10000100-00000010-00000101 +// CHECK-INST: mov za2h.s[w12, 2:3], { z16.s, z17.s } +// CHECK-ENCODING: [0x05,0x02,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840205 + +mova za0h.s[w12, 2:3], {z0.s, z1.s} // 11000000-10000100-00000000-00000001 +// CHECK-INST: mov za0h.s[w12, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0x00,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840001 + +mova za0h.s[w14, 0:1], {z18.s, z19.s} // 11000000-10000100-01000010-01000000 +// CHECK-INST: mov za0h.s[w14, 0:1], { z18.s, z19.s } +// CHECK-ENCODING: [0x40,0x42,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844240 + +mova za0h.s[w12, 0:1], {z12.s, z13.s} // 11000000-10000100-00000001-10000000 +// CHECK-INST: mov za0h.s[w12, 0:1], { z12.s, z13.s } +// CHECK-ENCODING: [0x80,0x01,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840180 + +mova za0h.s[w14, 2:3], {z0.s, z1.s} // 11000000-10000100-01000000-00000001 +// CHECK-INST: mov za0h.s[w14, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0x40,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844001 + +mova za2h.s[w12, 2:3], {z22.s, z23.s} // 11000000-10000100-00000010-11000101 +// CHECK-INST: mov za2h.s[w12, 2:3], { z22.s, z23.s } +// CHECK-ENCODING: [0xc5,0x02,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08402c5 + +mova za1h.s[w15, 0:1], {z8.s, z9.s} // 11000000-10000100-01100001-00000010 +// CHECK-INST: mov za1h.s[w15, 0:1], { z8.s, z9.s } +// CHECK-ENCODING: [0x02,0x61,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846102 + +mova za3h.s[w13, 2:3], {z12.s, z13.s} // 11000000-10000100-00100001-10000111 +// CHECK-INST: mov za3h.s[w13, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0x21,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0842187 + +// Aliases + +mov za0h.s[w12, 0:1], {z0.s, z1.s} // 11000000-10000100-00000000-00000000 +// CHECK-INST: mov za0h.s[w12, 0:1], { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x00,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840000 + +mov za2h.s[w14, 2:3], {z10.s, z11.s} // 11000000-10000100-01000001-01000101 +// CHECK-INST: mov za2h.s[w14, 2:3], { z10.s, z11.s } +// CHECK-ENCODING: [0x45,0x41,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844145 + +mov za3h.s[w15, 2:3], {z12.s, z13.s} // 11000000-10000100-01100001-10000111 +// CHECK-INST: mov za3h.s[w15, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0x61,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846187 + +mov za3h.s[w15, 2:3], {z30.s, z31.s} // 11000000-10000100-01100011-11000111 +// CHECK-INST: mov za3h.s[w15, 2:3], { z30.s, z31.s } +// CHECK-ENCODING: [0xc7,0x63,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08463c7 + +mov za2h.s[w12, 2:3], {z16.s, z17.s} // 11000000-10000100-00000010-00000101 +// CHECK-INST: mov za2h.s[w12, 2:3], { z16.s, z17.s } +// CHECK-ENCODING: [0x05,0x02,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840205 + +mov za0h.s[w12, 2:3], {z0.s, z1.s} // 11000000-10000100-00000000-00000001 +// CHECK-INST: mov za0h.s[w12, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0x00,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840001 + +mov za0h.s[w14, 0:1], {z18.s, z19.s} // 11000000-10000100-01000010-01000000 +// CHECK-INST: mov za0h.s[w14, 0:1], { z18.s, z19.s } +// CHECK-ENCODING: [0x40,0x42,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844240 + +mov za0h.s[w12, 0:1], {z12.s, z13.s} // 11000000-10000100-00000001-10000000 +// CHECK-INST: mov za0h.s[w12, 0:1], { z12.s, z13.s } +// CHECK-ENCODING: [0x80,0x01,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840180 + +mov za0h.s[w14, 2:3], {z0.s, z1.s} // 11000000-10000100-01000000-00000001 +// CHECK-INST: mov za0h.s[w14, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0x40,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844001 + +mov za2h.s[w12, 2:3], {z22.s, z23.s} // 11000000-10000100-00000010-11000101 +// CHECK-INST: mov za2h.s[w12, 2:3], { z22.s, z23.s } +// CHECK-ENCODING: [0xc5,0x02,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08402c5 + +mov za1h.s[w15, 0:1], {z8.s, z9.s} // 11000000-10000100-01100001-00000010 +// CHECK-INST: mov za1h.s[w15, 0:1], { z8.s, z9.s } +// CHECK-ENCODING: [0x02,0x61,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846102 + +mov za3h.s[w13, 2:3], {z12.s, z13.s} // 11000000-10000100-00100001-10000111 +// CHECK-INST: mov za3h.s[w13, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0x21,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0842187 + + +mova za0v.s[w12, 0:1], {z0.s, z1.s} // 11000000-10000100-10000000-00000000 +// CHECK-INST: mov za0v.s[w12, 0:1], { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x80,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848000 + +mova za2v.s[w14, 2:3], {z10.s, z11.s} // 11000000-10000100-11000001-01000101 +// CHECK-INST: mov za2v.s[w14, 2:3], { z10.s, z11.s } +// CHECK-ENCODING: [0x45,0xc1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c145 + +mova za3v.s[w15, 2:3], {z12.s, z13.s} // 11000000-10000100-11100001-10000111 +// CHECK-INST: mov za3v.s[w15, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0xe1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e187 + +mova za3v.s[w15, 2:3], {z30.s, z31.s} // 11000000-10000100-11100011-11000111 +// CHECK-INST: mov za3v.s[w15, 2:3], { z30.s, z31.s } +// CHECK-ENCODING: [0xc7,0xe3,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e3c7 + +mova za2v.s[w12, 2:3], {z16.s, z17.s} // 11000000-10000100-10000010-00000101 +// CHECK-INST: mov za2v.s[w12, 2:3], { z16.s, z17.s } +// CHECK-ENCODING: [0x05,0x82,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848205 + +mova za0v.s[w12, 2:3], {z0.s, z1.s} // 11000000-10000100-10000000-00000001 +// CHECK-INST: mov za0v.s[w12, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0x80,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848001 + +mova za0v.s[w14, 0:1], {z18.s, z19.s} // 11000000-10000100-11000010-01000000 +// CHECK-INST: mov za0v.s[w14, 0:1], { z18.s, z19.s } +// CHECK-ENCODING: [0x40,0xc2,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c240 + +mova za0v.s[w12, 0:1], {z12.s, z13.s} // 11000000-10000100-10000001-10000000 +// CHECK-INST: mov za0v.s[w12, 0:1], { z12.s, z13.s } +// CHECK-ENCODING: [0x80,0x81,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848180 + +mova za0v.s[w14, 2:3], {z0.s, z1.s} // 11000000-10000100-11000000-00000001 +// CHECK-INST: mov za0v.s[w14, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xc0,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c001 + +mova za2v.s[w12, 2:3], {z22.s, z23.s} // 11000000-10000100-10000010-11000101 +// CHECK-INST: mov za2v.s[w12, 2:3], { z22.s, z23.s } +// CHECK-ENCODING: [0xc5,0x82,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08482c5 + +mova za1v.s[w15, 0:1], {z8.s, z9.s} // 11000000-10000100-11100001-00000010 +// CHECK-INST: mov za1v.s[w15, 0:1], { z8.s, z9.s } +// CHECK-ENCODING: [0x02,0xe1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e102 + +mova za3v.s[w13, 2:3], {z12.s, z13.s} // 11000000-10000100-10100001-10000111 +// CHECK-INST: mov za3v.s[w13, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0xa1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084a187 + +// Aliases + +mov za0v.s[w12, 0:1], {z0.s, z1.s} // 11000000-10000100-10000000-00000000 +// CHECK-INST: mov za0v.s[w12, 0:1], { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x80,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848000 + +mov za2v.s[w14, 2:3], {z10.s, z11.s} // 11000000-10000100-11000001-01000101 +// CHECK-INST: mov za2v.s[w14, 2:3], { z10.s, z11.s } +// CHECK-ENCODING: [0x45,0xc1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c145 + +mov za3v.s[w15, 2:3], {z12.s, z13.s} // 11000000-10000100-11100001-10000111 +// CHECK-INST: mov za3v.s[w15, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0xe1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e187 + +mov za3v.s[w15, 2:3], {z30.s, z31.s} // 11000000-10000100-11100011-11000111 +// CHECK-INST: mov za3v.s[w15, 2:3], { z30.s, z31.s } +// CHECK-ENCODING: [0xc7,0xe3,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e3c7 + +mov za2v.s[w12, 2:3], {z16.s, z17.s} // 11000000-10000100-10000010-00000101 +// CHECK-INST: mov za2v.s[w12, 2:3], { z16.s, z17.s } +// CHECK-ENCODING: [0x05,0x82,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848205 + +mov za0v.s[w12, 2:3], {z0.s, z1.s} // 11000000-10000100-10000000-00000001 +// CHECK-INST: mov za0v.s[w12, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0x80,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848001 + +mov za0v.s[w14, 0:1], {z18.s, z19.s} // 11000000-10000100-11000010-01000000 +// CHECK-INST: mov za0v.s[w14, 0:1], { z18.s, z19.s } +// CHECK-ENCODING: [0x40,0xc2,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c240 + +mov za0v.s[w12, 0:1], {z12.s, z13.s} // 11000000-10000100-10000001-10000000 +// CHECK-INST: mov za0v.s[w12, 0:1], { z12.s, z13.s } +// CHECK-ENCODING: [0x80,0x81,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848180 + +mov za0v.s[w14, 2:3], {z0.s, z1.s} // 11000000-10000100-11000000-00000001 +// CHECK-INST: mov za0v.s[w14, 2:3], { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xc0,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c001 + +mov za2v.s[w12, 2:3], {z22.s, z23.s} // 11000000-10000100-10000010-11000101 +// CHECK-INST: mov za2v.s[w12, 2:3], { z22.s, z23.s } +// CHECK-ENCODING: [0xc5,0x82,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c08482c5 + +mov za1v.s[w15, 0:1], {z8.s, z9.s} // 11000000-10000100-11100001-00000010 +// CHECK-INST: mov za1v.s[w15, 0:1], { z8.s, z9.s } +// CHECK-ENCODING: [0x02,0xe1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e102 + +mov za3v.s[w13, 2:3], {z12.s, z13.s} // 11000000-10000100-10100001-10000111 +// CHECK-INST: mov za3v.s[w13, 2:3], { z12.s, z13.s } +// CHECK-ENCODING: [0x87,0xa1,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084a187 + + +mova {z0.d, z1.d}, za0h.d[w12, 0:1] // 11000000-11000110-00000000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za0h.d[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60000 + +mova {z20.d, z21.d}, za2h.d[w14, 0:1] // 11000000-11000110-01000000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za2h.d[w14, 0:1] +// CHECK-ENCODING: [0x54,0x40,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64054 + +mova {z22.d, z23.d}, za5h.d[w15, 0:1] // 11000000-11000110-01100000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za5h.d[w15, 0:1] +// CHECK-ENCODING: [0xb6,0x60,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c660b6 + +mova {z30.d, z31.d}, za7h.d[w15, 0:1] // 11000000-11000110-01100000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za7h.d[w15, 0:1] +// CHECK-ENCODING: [0xfe,0x60,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c660fe + +mova {z4.d, z5.d}, za1h.d[w12, 0:1] // 11000000-11000110-00000000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za1h.d[w12, 0:1] +// CHECK-ENCODING: [0x24,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60024 + +mova {z0.d, z1.d}, za1h.d[w12, 0:1] // 11000000-11000110-00000000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za1h.d[w12, 0:1] +// CHECK-ENCODING: [0x20,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60020 + +mova {z24.d, z25.d}, za3h.d[w14, 0:1] // 11000000-11000110-01000000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za3h.d[w14, 0:1] +// CHECK-ENCODING: [0x78,0x40,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64078 + +mova {z0.d, z1.d}, za4h.d[w12, 0:1] // 11000000-11000110-00000000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za4h.d[w12, 0:1] +// CHECK-ENCODING: [0x80,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60080 + +mova {z16.d, z17.d}, za1h.d[w14, 0:1] // 11000000-11000110-01000000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za1h.d[w14, 0:1] +// CHECK-ENCODING: [0x30,0x40,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64030 + +mova {z28.d, z29.d}, za6h.d[w12, 0:1] // 11000000-11000110-00000000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za6h.d[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c600dc + +mova {z2.d, z3.d}, za1h.d[w15, 0:1] // 11000000-11000110-01100000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za1h.d[w15, 0:1] +// CHECK-ENCODING: [0x22,0x60,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c66022 + +mova {z6.d, z7.d}, za4h.d[w13, 0:1] // 11000000-11000110-00100000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za4h.d[w13, 0:1] +// CHECK-ENCODING: [0x86,0x20,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c62086 + +// Aliases + +mov {z0.d, z1.d}, za0h.d[w12, 0:1] // 11000000-11000110-00000000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za0h.d[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60000 + +mov {z20.d, z21.d}, za2h.d[w14, 0:1] // 11000000-11000110-01000000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za2h.d[w14, 0:1] +// CHECK-ENCODING: [0x54,0x40,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64054 + +mov {z22.d, z23.d}, za5h.d[w15, 0:1] // 11000000-11000110-01100000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za5h.d[w15, 0:1] +// CHECK-ENCODING: [0xb6,0x60,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c660b6 + +mov {z30.d, z31.d}, za7h.d[w15, 0:1] // 11000000-11000110-01100000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za7h.d[w15, 0:1] +// CHECK-ENCODING: [0xfe,0x60,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c660fe + +mov {z4.d, z5.d}, za1h.d[w12, 0:1] // 11000000-11000110-00000000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za1h.d[w12, 0:1] +// CHECK-ENCODING: [0x24,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60024 + +mov {z0.d, z1.d}, za1h.d[w12, 0:1] // 11000000-11000110-00000000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za1h.d[w12, 0:1] +// CHECK-ENCODING: [0x20,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60020 + +mov {z24.d, z25.d}, za3h.d[w14, 0:1] // 11000000-11000110-01000000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za3h.d[w14, 0:1] +// CHECK-ENCODING: [0x78,0x40,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64078 + +mov {z0.d, z1.d}, za4h.d[w12, 0:1] // 11000000-11000110-00000000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za4h.d[w12, 0:1] +// CHECK-ENCODING: [0x80,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60080 + +mov {z16.d, z17.d}, za1h.d[w14, 0:1] // 11000000-11000110-01000000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za1h.d[w14, 0:1] +// CHECK-ENCODING: [0x30,0x40,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64030 + +mov {z28.d, z29.d}, za6h.d[w12, 0:1] // 11000000-11000110-00000000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za6h.d[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x00,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c600dc + +mov {z2.d, z3.d}, za1h.d[w15, 0:1] // 11000000-11000110-01100000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za1h.d[w15, 0:1] +// CHECK-ENCODING: [0x22,0x60,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c66022 + +mov {z6.d, z7.d}, za4h.d[w13, 0:1] // 11000000-11000110-00100000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za4h.d[w13, 0:1] +// CHECK-ENCODING: [0x86,0x20,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c62086 + + +mova {z0.d, z1.d}, za0v.d[w12, 0:1] // 11000000-11000110-10000000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za0v.d[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68000 + +mova {z20.d, z21.d}, za2v.d[w14, 0:1] // 11000000-11000110-11000000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za2v.d[w14, 0:1] +// CHECK-ENCODING: [0x54,0xc0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c054 + +mova {z22.d, z23.d}, za5v.d[w15, 0:1] // 11000000-11000110-11100000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za5v.d[w15, 0:1] +// CHECK-ENCODING: [0xb6,0xe0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e0b6 + +mova {z30.d, z31.d}, za7v.d[w15, 0:1] // 11000000-11000110-11100000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za7v.d[w15, 0:1] +// CHECK-ENCODING: [0xfe,0xe0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e0fe + +mova {z4.d, z5.d}, za1v.d[w12, 0:1] // 11000000-11000110-10000000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za1v.d[w12, 0:1] +// CHECK-ENCODING: [0x24,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68024 + +mova {z0.d, z1.d}, za1v.d[w12, 0:1] // 11000000-11000110-10000000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za1v.d[w12, 0:1] +// CHECK-ENCODING: [0x20,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68020 + +mova {z24.d, z25.d}, za3v.d[w14, 0:1] // 11000000-11000110-11000000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za3v.d[w14, 0:1] +// CHECK-ENCODING: [0x78,0xc0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c078 + +mova {z0.d, z1.d}, za4v.d[w12, 0:1] // 11000000-11000110-10000000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za4v.d[w12, 0:1] +// CHECK-ENCODING: [0x80,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68080 + +mova {z16.d, z17.d}, za1v.d[w14, 0:1] // 11000000-11000110-11000000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za1v.d[w14, 0:1] +// CHECK-ENCODING: [0x30,0xc0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c030 + +mova {z28.d, z29.d}, za6v.d[w12, 0:1] // 11000000-11000110-10000000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za6v.d[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c680dc + +mova {z2.d, z3.d}, za1v.d[w15, 0:1] // 11000000-11000110-11100000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za1v.d[w15, 0:1] +// CHECK-ENCODING: [0x22,0xe0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e022 + +mova {z6.d, z7.d}, za4v.d[w13, 0:1] // 11000000-11000110-10100000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za4v.d[w13, 0:1] +// CHECK-ENCODING: [0x86,0xa0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6a086 + +// Aliases + +mov {z0.d, z1.d}, za0v.d[w12, 0:1] // 11000000-11000110-10000000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za0v.d[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68000 + +mov {z20.d, z21.d}, za2v.d[w14, 0:1] // 11000000-11000110-11000000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za2v.d[w14, 0:1] +// CHECK-ENCODING: [0x54,0xc0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c054 + +mov {z22.d, z23.d}, za5v.d[w15, 0:1] // 11000000-11000110-11100000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za5v.d[w15, 0:1] +// CHECK-ENCODING: [0xb6,0xe0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e0b6 + +mov {z30.d, z31.d}, za7v.d[w15, 0:1] // 11000000-11000110-11100000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za7v.d[w15, 0:1] +// CHECK-ENCODING: [0xfe,0xe0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e0fe + +mov {z4.d, z5.d}, za1v.d[w12, 0:1] // 11000000-11000110-10000000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za1v.d[w12, 0:1] +// CHECK-ENCODING: [0x24,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68024 + +mov {z0.d, z1.d}, za1v.d[w12, 0:1] // 11000000-11000110-10000000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za1v.d[w12, 0:1] +// CHECK-ENCODING: [0x20,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68020 + +mov {z24.d, z25.d}, za3v.d[w14, 0:1] // 11000000-11000110-11000000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za3v.d[w14, 0:1] +// CHECK-ENCODING: [0x78,0xc0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c078 + +mov {z0.d, z1.d}, za4v.d[w12, 0:1] // 11000000-11000110-10000000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za4v.d[w12, 0:1] +// CHECK-ENCODING: [0x80,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68080 + +mov {z16.d, z17.d}, za1v.d[w14, 0:1] // 11000000-11000110-11000000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za1v.d[w14, 0:1] +// CHECK-ENCODING: [0x30,0xc0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c030 + +mov {z28.d, z29.d}, za6v.d[w12, 0:1] // 11000000-11000110-10000000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za6v.d[w12, 0:1] +// CHECK-ENCODING: [0xdc,0x80,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c680dc + +mov {z2.d, z3.d}, za1v.d[w15, 0:1] // 11000000-11000110-11100000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za1v.d[w15, 0:1] +// CHECK-ENCODING: [0x22,0xe0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e022 + +mov {z6.d, z7.d}, za4v.d[w13, 0:1] // 11000000-11000110-10100000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za4v.d[w13, 0:1] +// CHECK-ENCODING: [0x86,0xa0,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6a086 + + +mova {z0.d, z1.d}, za.d[w8, 0, vgx2] // 11000000-00000110-00001000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +// CHECK-ENCODING: [0x00,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060800 + +mova {z0.d, z1.d}, za.d[w8, 0] // 11000000-00000110-00001000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +// CHECK-ENCODING: [0x00,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060800 + +mova {z20.d, z21.d}, za.d[w10, 2, vgx2] // 11000000-00000110-01001000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za.d[w10, 2, vgx2] +// CHECK-ENCODING: [0x54,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064854 + +mova {z20.d, z21.d}, za.d[w10, 2] // 11000000-00000110-01001000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za.d[w10, 2, vgx2] +// CHECK-ENCODING: [0x54,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064854 + +mova {z22.d, z23.d}, za.d[w11, 5, vgx2] // 11000000-00000110-01101000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za.d[w11, 5, vgx2] +// CHECK-ENCODING: [0xb6,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00668b6 + +mova {z22.d, z23.d}, za.d[w11, 5] // 11000000-00000110-01101000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za.d[w11, 5, vgx2] +// CHECK-ENCODING: [0xb6,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00668b6 + +mova {z30.d, z31.d}, za.d[w11, 7, vgx2] // 11000000-00000110-01101000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za.d[w11, 7, vgx2] +// CHECK-ENCODING: [0xfe,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00668fe + +mova {z30.d, z31.d}, za.d[w11, 7] // 11000000-00000110-01101000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za.d[w11, 7, vgx2] +// CHECK-ENCODING: [0xfe,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00668fe + +mova {z4.d, z5.d}, za.d[w8, 1, vgx2] // 11000000-00000110-00001000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za.d[w8, 1, vgx2] +// CHECK-ENCODING: [0x24,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060824 + +mova {z4.d, z5.d}, za.d[w8, 1] // 11000000-00000110-00001000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za.d[w8, 1, vgx2] +// CHECK-ENCODING: [0x24,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060824 + +mova {z0.d, z1.d}, za.d[w8, 1, vgx2] // 11000000-00000110-00001000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 1, vgx2] +// CHECK-ENCODING: [0x20,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060820 + +mova {z0.d, z1.d}, za.d[w8, 1] // 11000000-00000110-00001000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 1, vgx2] +// CHECK-ENCODING: [0x20,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060820 + +mova {z24.d, z25.d}, za.d[w10, 3, vgx2] // 11000000-00000110-01001000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za.d[w10, 3, vgx2] +// CHECK-ENCODING: [0x78,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064878 + +mova {z24.d, z25.d}, za.d[w10, 3] // 11000000-00000110-01001000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za.d[w10, 3, vgx2] +// CHECK-ENCODING: [0x78,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064878 + +mova {z0.d, z1.d}, za.d[w8, 4, vgx2] // 11000000-00000110-00001000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 4, vgx2] +// CHECK-ENCODING: [0x80,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060880 + +mova {z0.d, z1.d}, za.d[w8, 4] // 11000000-00000110-00001000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 4, vgx2] +// CHECK-ENCODING: [0x80,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060880 + +mova {z16.d, z17.d}, za.d[w10, 1, vgx2] // 11000000-00000110-01001000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za.d[w10, 1, vgx2] +// CHECK-ENCODING: [0x30,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064830 + +mova {z16.d, z17.d}, za.d[w10, 1] // 11000000-00000110-01001000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za.d[w10, 1, vgx2] +// CHECK-ENCODING: [0x30,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064830 + +mova {z28.d, z29.d}, za.d[w8, 6, vgx2] // 11000000-00000110-00001000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za.d[w8, 6, vgx2] +// CHECK-ENCODING: [0xdc,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00608dc + +mova {z28.d, z29.d}, za.d[w8, 6] // 11000000-00000110-00001000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za.d[w8, 6, vgx2] +// CHECK-ENCODING: [0xdc,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00608dc + +mova {z2.d, z3.d}, za.d[w11, 1, vgx2] // 11000000-00000110-01101000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za.d[w11, 1, vgx2] +// CHECK-ENCODING: [0x22,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066822 + +mova {z2.d, z3.d}, za.d[w11, 1] // 11000000-00000110-01101000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za.d[w11, 1, vgx2] +// CHECK-ENCODING: [0x22,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066822 + +mova {z6.d, z7.d}, za.d[w9, 4, vgx2] // 11000000-00000110-00101000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za.d[w9, 4, vgx2] +// CHECK-ENCODING: [0x86,0x28,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062886 + +mova {z6.d, z7.d}, za.d[w9, 4] // 11000000-00000110-00101000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za.d[w9, 4, vgx2] +// CHECK-ENCODING: [0x86,0x28,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062886 + +// Aliases + +mov {z0.d, z1.d}, za.d[w8, 0, vgx2] // 11000000-00000110-00001000-00000000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 0, vgx2] +// CHECK-ENCODING: [0x00,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060800 + +mov {z20.d, z21.d}, za.d[w10, 2, vgx2] // 11000000-00000110-01001000-01010100 +// CHECK-INST: mov { z20.d, z21.d }, za.d[w10, 2, vgx2] +// CHECK-ENCODING: [0x54,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064854 + +mov {z22.d, z23.d}, za.d[w11, 5, vgx2] // 11000000-00000110-01101000-10110110 +// CHECK-INST: mov { z22.d, z23.d }, za.d[w11, 5, vgx2] +// CHECK-ENCODING: [0xb6,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00668b6 + +mov {z30.d, z31.d}, za.d[w11, 7, vgx2] // 11000000-00000110-01101000-11111110 +// CHECK-INST: mov { z30.d, z31.d }, za.d[w11, 7, vgx2] +// CHECK-ENCODING: [0xfe,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00668fe + +mov {z4.d, z5.d}, za.d[w8, 1, vgx2] // 11000000-00000110-00001000-00100100 +// CHECK-INST: mov { z4.d, z5.d }, za.d[w8, 1, vgx2] +// CHECK-ENCODING: [0x24,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060824 + +mov {z0.d, z1.d}, za.d[w8, 1, vgx2] // 11000000-00000110-00001000-00100000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 1, vgx2] +// CHECK-ENCODING: [0x20,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060820 + +mov {z24.d, z25.d}, za.d[w10, 3, vgx2] // 11000000-00000110-01001000-01111000 +// CHECK-INST: mov { z24.d, z25.d }, za.d[w10, 3, vgx2] +// CHECK-ENCODING: [0x78,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064878 + +mov {z0.d, z1.d}, za.d[w8, 4, vgx2] // 11000000-00000110-00001000-10000000 +// CHECK-INST: mov { z0.d, z1.d }, za.d[w8, 4, vgx2] +// CHECK-ENCODING: [0x80,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060880 + +mov {z16.d, z17.d}, za.d[w10, 1, vgx2] // 11000000-00000110-01001000-00110000 +// CHECK-INST: mov { z16.d, z17.d }, za.d[w10, 1, vgx2] +// CHECK-ENCODING: [0x30,0x48,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064830 + +mov {z28.d, z29.d}, za.d[w8, 6, vgx2] // 11000000-00000110-00001000-11011100 +// CHECK-INST: mov { z28.d, z29.d }, za.d[w8, 6, vgx2] +// CHECK-ENCODING: [0xdc,0x08,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00608dc + +mov {z2.d, z3.d}, za.d[w11, 1, vgx2] // 11000000-00000110-01101000-00100010 +// CHECK-INST: mov { z2.d, z3.d }, za.d[w11, 1, vgx2] +// CHECK-ENCODING: [0x22,0x68,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066822 + +mov {z6.d, z7.d}, za.d[w9, 4, vgx2] // 11000000-00000110-00101000-10000110 +// CHECK-INST: mov { z6.d, z7.d }, za.d[w9, 4, vgx2] +// CHECK-ENCODING: [0x86,0x28,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062886 + + +mova za0h.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-00000000-00000000 +// CHECK-INST: mov za0h.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x00,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40000 + +mova za5h.d[w14, 0:1], {z10.d, z11.d} // 11000000-11000100-01000001-01000101 +// CHECK-INST: mov za5h.d[w14, 0:1], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0x41,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44145 + +mova za7h.d[w15, 0:1], {z12.d, z13.d} // 11000000-11000100-01100001-10000111 +// CHECK-INST: mov za7h.d[w15, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x61,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46187 + +mova za7h.d[w15, 0:1], {z30.d, z31.d} // 11000000-11000100-01100011-11000111 +// CHECK-INST: mov za7h.d[w15, 0:1], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0x63,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c463c7 + +mova za5h.d[w12, 0:1], {z16.d, z17.d} // 11000000-11000100-00000010-00000101 +// CHECK-INST: mov za5h.d[w12, 0:1], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x02,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40205 + +mova za1h.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-00000000-00000001 +// CHECK-INST: mov za1h.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x00,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40001 + +mova za0h.d[w14, 0:1], {z18.d, z19.d} // 11000000-11000100-01000010-01000000 +// CHECK-INST: mov za0h.d[w14, 0:1], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0x42,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44240 + +mova za0h.d[w12, 0:1], {z12.d, z13.d} // 11000000-11000100-00000001-10000000 +// CHECK-INST: mov za0h.d[w12, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x01,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40180 + +mova za1h.d[w14, 0:1], {z0.d, z1.d} // 11000000-11000100-01000000-00000001 +// CHECK-INST: mov za1h.d[w14, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x40,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44001 + +mova za5h.d[w12, 0:1], {z22.d, z23.d} // 11000000-11000100-00000010-11000101 +// CHECK-INST: mov za5h.d[w12, 0:1], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x02,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c402c5 + +mova za2h.d[w15, 0:1], {z8.d, z9.d} // 11000000-11000100-01100001-00000010 +// CHECK-INST: mov za2h.d[w15, 0:1], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0x61,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46102 + +mova za7h.d[w13, 0:1], {z12.d, z13.d} // 11000000-11000100-00100001-10000111 +// CHECK-INST: mov za7h.d[w13, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x21,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c42187 + +// Aliases + +mov za0h.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-00000000-00000000 +// CHECK-INST: mov za0h.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x00,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40000 + +mov za5h.d[w14, 0:1], {z10.d, z11.d} // 11000000-11000100-01000001-01000101 +// CHECK-INST: mov za5h.d[w14, 0:1], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0x41,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44145 + +mov za7h.d[w15, 0:1], {z12.d, z13.d} // 11000000-11000100-01100001-10000111 +// CHECK-INST: mov za7h.d[w15, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x61,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46187 + +mov za7h.d[w15, 0:1], {z30.d, z31.d} // 11000000-11000100-01100011-11000111 +// CHECK-INST: mov za7h.d[w15, 0:1], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0x63,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c463c7 + +mov za5h.d[w12, 0:1], {z16.d, z17.d} // 11000000-11000100-00000010-00000101 +// CHECK-INST: mov za5h.d[w12, 0:1], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x02,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40205 + +mov za1h.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-00000000-00000001 +// CHECK-INST: mov za1h.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x00,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40001 + +mov za0h.d[w14, 0:1], {z18.d, z19.d} // 11000000-11000100-01000010-01000000 +// CHECK-INST: mov za0h.d[w14, 0:1], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0x42,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44240 + +mov za0h.d[w12, 0:1], {z12.d, z13.d} // 11000000-11000100-00000001-10000000 +// CHECK-INST: mov za0h.d[w12, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x01,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40180 + +mov za1h.d[w14, 0:1], {z0.d, z1.d} // 11000000-11000100-01000000-00000001 +// CHECK-INST: mov za1h.d[w14, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x40,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44001 + +mov za5h.d[w12, 0:1], {z22.d, z23.d} // 11000000-11000100-00000010-11000101 +// CHECK-INST: mov za5h.d[w12, 0:1], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x02,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c402c5 + +mov za2h.d[w15, 0:1], {z8.d, z9.d} // 11000000-11000100-01100001-00000010 +// CHECK-INST: mov za2h.d[w15, 0:1], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0x61,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46102 + +mov za7h.d[w13, 0:1], {z12.d, z13.d} // 11000000-11000100-00100001-10000111 +// CHECK-INST: mov za7h.d[w13, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x21,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c42187 + + +mova za0v.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-10000000-00000000 +// CHECK-INST: mov za0v.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x80,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48000 + +mova za5v.d[w14, 0:1], {z10.d, z11.d} // 11000000-11000100-11000001-01000101 +// CHECK-INST: mov za5v.d[w14, 0:1], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0xc1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c145 + +mova za7v.d[w15, 0:1], {z12.d, z13.d} // 11000000-11000100-11100001-10000111 +// CHECK-INST: mov za7v.d[w15, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0xe1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e187 + +mova za7v.d[w15, 0:1], {z30.d, z31.d} // 11000000-11000100-11100011-11000111 +// CHECK-INST: mov za7v.d[w15, 0:1], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0xe3,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e3c7 + +mova za5v.d[w12, 0:1], {z16.d, z17.d} // 11000000-11000100-10000010-00000101 +// CHECK-INST: mov za5v.d[w12, 0:1], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x82,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48205 + +mova za1v.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-10000000-00000001 +// CHECK-INST: mov za1v.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x80,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48001 + +mova za0v.d[w14, 0:1], {z18.d, z19.d} // 11000000-11000100-11000010-01000000 +// CHECK-INST: mov za0v.d[w14, 0:1], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0xc2,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c240 + +mova za0v.d[w12, 0:1], {z12.d, z13.d} // 11000000-11000100-10000001-10000000 +// CHECK-INST: mov za0v.d[w12, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x81,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48180 + +mova za1v.d[w14, 0:1], {z0.d, z1.d} // 11000000-11000100-11000000-00000001 +// CHECK-INST: mov za1v.d[w14, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0xc0,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c001 + +mova za5v.d[w12, 0:1], {z22.d, z23.d} // 11000000-11000100-10000010-11000101 +// CHECK-INST: mov za5v.d[w12, 0:1], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x82,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c482c5 + +mova za2v.d[w15, 0:1], {z8.d, z9.d} // 11000000-11000100-11100001-00000010 +// CHECK-INST: mov za2v.d[w15, 0:1], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0xe1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e102 + +mova za7v.d[w13, 0:1], {z12.d, z13.d} // 11000000-11000100-10100001-10000111 +// CHECK-INST: mov za7v.d[w13, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0xa1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4a187 + +// Aliases + +mov za0v.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-10000000-00000000 +// CHECK-INST: mov za0v.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x80,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48000 + +mov za5v.d[w14, 0:1], {z10.d, z11.d} // 11000000-11000100-11000001-01000101 +// CHECK-INST: mov za5v.d[w14, 0:1], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0xc1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c145 + +mov za7v.d[w15, 0:1], {z12.d, z13.d} // 11000000-11000100-11100001-10000111 +// CHECK-INST: mov za7v.d[w15, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0xe1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e187 + +mov za7v.d[w15, 0:1], {z30.d, z31.d} // 11000000-11000100-11100011-11000111 +// CHECK-INST: mov za7v.d[w15, 0:1], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0xe3,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e3c7 + +mov za5v.d[w12, 0:1], {z16.d, z17.d} // 11000000-11000100-10000010-00000101 +// CHECK-INST: mov za5v.d[w12, 0:1], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x82,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48205 + +mov za1v.d[w12, 0:1], {z0.d, z1.d} // 11000000-11000100-10000000-00000001 +// CHECK-INST: mov za1v.d[w12, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x80,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48001 + +mov za0v.d[w14, 0:1], {z18.d, z19.d} // 11000000-11000100-11000010-01000000 +// CHECK-INST: mov za0v.d[w14, 0:1], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0xc2,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c240 + +mov za0v.d[w12, 0:1], {z12.d, z13.d} // 11000000-11000100-10000001-10000000 +// CHECK-INST: mov za0v.d[w12, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x81,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48180 + +mov za1v.d[w14, 0:1], {z0.d, z1.d} // 11000000-11000100-11000000-00000001 +// CHECK-INST: mov za1v.d[w14, 0:1], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0xc0,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c001 + +mov za5v.d[w12, 0:1], {z22.d, z23.d} // 11000000-11000100-10000010-11000101 +// CHECK-INST: mov za5v.d[w12, 0:1], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x82,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c482c5 + +mov za2v.d[w15, 0:1], {z8.d, z9.d} // 11000000-11000100-11100001-00000010 +// CHECK-INST: mov za2v.d[w15, 0:1], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0xe1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e102 + +mov za7v.d[w13, 0:1], {z12.d, z13.d} // 11000000-11000100-10100001-10000111 +// CHECK-INST: mov za7v.d[w13, 0:1], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0xa1,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4a187 + + +mova za.d[w8, 0, vgx2], {z0.d, z1.d} // 11000000-00000100-00001000-00000000 +// CHECK-INST: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x08,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040800 + +mova za.d[w8, 0], {z0.d, z1.d} // 11000000-00000100-00001000-00000000 +// CHECK-INST: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x08,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040800 + +mova za.d[w10, 5, vgx2], {z10.d, z11.d} // 11000000-00000100-01001001-01000101 +// CHECK-INST: mov za.d[w10, 5, vgx2], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0x49,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044945 + +mova za.d[w10, 5], {z10.d, z11.d} // 11000000-00000100-01001001-01000101 +// CHECK-INST: mov za.d[w10, 5, vgx2], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0x49,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044945 + +mova za.d[w11, 7, vgx2], {z12.d, z13.d} // 11000000-00000100-01101001-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x69,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046987 + +mova za.d[w11, 7], {z12.d, z13.d} // 11000000-00000100-01101001-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x69,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046987 + +mova za.d[w11, 7, vgx2], {z30.d, z31.d} // 11000000-00000100-01101011-11000111 +// CHECK-INST: mov za.d[w11, 7, vgx2], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0x6b,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046bc7 + +mova za.d[w11, 7], {z30.d, z31.d} // 11000000-00000100-01101011-11000111 +// CHECK-INST: mov za.d[w11, 7, vgx2], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0x6b,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046bc7 + +mova za.d[w8, 5, vgx2], {z16.d, z17.d} // 11000000-00000100-00001010-00000101 +// CHECK-INST: mov za.d[w8, 5, vgx2], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x0a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040a05 + +mova za.d[w8, 5], {z16.d, z17.d} // 11000000-00000100-00001010-00000101 +// CHECK-INST: mov za.d[w8, 5, vgx2], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x0a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040a05 + +mova za.d[w8, 1, vgx2], {z0.d, z1.d} // 11000000-00000100-00001000-00000001 +// CHECK-INST: mov za.d[w8, 1, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x08,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040801 + +mova za.d[w8, 1], {z0.d, z1.d} // 11000000-00000100-00001000-00000001 +// CHECK-INST: mov za.d[w8, 1, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x08,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040801 + +mova za.d[w10, 0, vgx2], {z18.d, z19.d} // 11000000-00000100-01001010-01000000 +// CHECK-INST: mov za.d[w10, 0, vgx2], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0x4a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044a40 + +mova za.d[w10, 0], {z18.d, z19.d} // 11000000-00000100-01001010-01000000 +// CHECK-INST: mov za.d[w10, 0, vgx2], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0x4a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044a40 + +mova za.d[w8, 0, vgx2], {z12.d, z13.d} // 11000000-00000100-00001001-10000000 +// CHECK-INST: mov za.d[w8, 0, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x09,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040980 + +mova za.d[w8, 0], {z12.d, z13.d} // 11000000-00000100-00001001-10000000 +// CHECK-INST: mov za.d[w8, 0, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x09,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040980 + +mova za.d[w10, 1, vgx2], {z0.d, z1.d} // 11000000-00000100-01001000-00000001 +// CHECK-INST: mov za.d[w10, 1, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x48,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044801 + +mova za.d[w10, 1], {z0.d, z1.d} // 11000000-00000100-01001000-00000001 +// CHECK-INST: mov za.d[w10, 1, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x48,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044801 + +mova za.d[w8, 5, vgx2], {z22.d, z23.d} // 11000000-00000100-00001010-11000101 +// CHECK-INST: mov za.d[w8, 5, vgx2], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x0a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040ac5 + +mova za.d[w8, 5], {z22.d, z23.d} // 11000000-00000100-00001010-11000101 +// CHECK-INST: mov za.d[w8, 5, vgx2], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x0a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040ac5 + +mova za.d[w11, 2, vgx2], {z8.d, z9.d} // 11000000-00000100-01101001-00000010 +// CHECK-INST: mov za.d[w11, 2, vgx2], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0x69,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046902 + +mova za.d[w11, 2], {z8.d, z9.d} // 11000000-00000100-01101001-00000010 +// CHECK-INST: mov za.d[w11, 2, vgx2], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0x69,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046902 + +mova za.d[w9, 7, vgx2], {z12.d, z13.d} // 11000000-00000100-00101001-10000111 +// CHECK-INST: mov za.d[w9, 7, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x29,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042987 + +mova za.d[w9, 7], {z12.d, z13.d} // 11000000-00000100-00101001-10000111 +// CHECK-INST: mov za.d[w9, 7, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x29,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042987 + +// Aliases + +mov za.d[w8, 0, vgx2], {z0.d, z1.d} // 11000000-00000100-00001000-00000000 +// CHECK-INST: mov za.d[w8, 0, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x08,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040800 + +mov za.d[w10, 5, vgx2], {z10.d, z11.d} // 11000000-00000100-01001001-01000101 +// CHECK-INST: mov za.d[w10, 5, vgx2], { z10.d, z11.d } +// CHECK-ENCODING: [0x45,0x49,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044945 + +mov za.d[w11, 7, vgx2], {z12.d, z13.d} // 11000000-00000100-01101001-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x69,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046987 + +mov za.d[w11, 7, vgx2], {z30.d, z31.d} // 11000000-00000100-01101011-11000111 +// CHECK-INST: mov za.d[w11, 7, vgx2], { z30.d, z31.d } +// CHECK-ENCODING: [0xc7,0x6b,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046bc7 + +mov za.d[w8, 5, vgx2], {z16.d, z17.d} // 11000000-00000100-00001010-00000101 +// CHECK-INST: mov za.d[w8, 5, vgx2], { z16.d, z17.d } +// CHECK-ENCODING: [0x05,0x0a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040a05 + +mov za.d[w8, 1, vgx2], {z0.d, z1.d} // 11000000-00000100-00001000-00000001 +// CHECK-INST: mov za.d[w8, 1, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x08,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040801 + +mov za.d[w10, 0, vgx2], {z18.d, z19.d} // 11000000-00000100-01001010-01000000 +// CHECK-INST: mov za.d[w10, 0, vgx2], { z18.d, z19.d } +// CHECK-ENCODING: [0x40,0x4a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044a40 + +mov za.d[w8, 0, vgx2], {z12.d, z13.d} // 11000000-00000100-00001001-10000000 +// CHECK-INST: mov za.d[w8, 0, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x80,0x09,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040980 + +mov za.d[w10, 1, vgx2], {z0.d, z1.d} // 11000000-00000100-01001000-00000001 +// CHECK-INST: mov za.d[w10, 1, vgx2], { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0x48,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044801 + +mov za.d[w8, 5, vgx2], {z22.d, z23.d} // 11000000-00000100-00001010-11000101 +// CHECK-INST: mov za.d[w8, 5, vgx2], { z22.d, z23.d } +// CHECK-ENCODING: [0xc5,0x0a,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040ac5 + +mov za.d[w11, 2, vgx2], {z8.d, z9.d} // 11000000-00000100-01101001-00000010 +// CHECK-INST: mov za.d[w11, 2, vgx2], { z8.d, z9.d } +// CHECK-ENCODING: [0x02,0x69,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046902 + +mov za.d[w9, 7, vgx2], {z12.d, z13.d} // 11000000-00000100-00101001-10000111 +// CHECK-INST: mov za.d[w9, 7, vgx2], { z12.d, z13.d } +// CHECK-ENCODING: [0x87,0x29,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042987 + + +mova {z0.b, z1.b}, za0h.b[w12, 0:1] // 11000000-00000110-00000000-00000000 +// CHECK-INST: mov { z0.b, z1.b }, za0h.b[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060000 + +mova {z20.b, z21.b}, za0h.b[w14, 4:5] // 11000000-00000110-01000000-01010100 +// CHECK-INST: mov { z20.b, z21.b }, za0h.b[w14, 4:5] +// CHECK-ENCODING: [0x54,0x40,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064054 + +mova {z22.b, z23.b}, za0h.b[w15, 10:11] // 11000000-00000110-01100000-10110110 +// CHECK-INST: mov { z22.b, z23.b }, za0h.b[w15, 10:11] +// CHECK-ENCODING: [0xb6,0x60,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00660b6 + +mova {z30.b, z31.b}, za0h.b[w15, 14:15] // 11000000-00000110-01100000-11111110 +// CHECK-INST: mov { z30.b, z31.b }, za0h.b[w15, 14:15] +// CHECK-ENCODING: [0xfe,0x60,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00660fe + +mova {z4.b, z5.b}, za0h.b[w12, 2:3] // 11000000-00000110-00000000-00100100 +// CHECK-INST: mov { z4.b, z5.b }, za0h.b[w12, 2:3] +// CHECK-ENCODING: [0x24,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060024 + +mova {z0.b, z1.b}, za0h.b[w12, 2:3] // 11000000-00000110-00000000-00100000 +// CHECK-INST: mov { z0.b, z1.b }, za0h.b[w12, 2:3] +// CHECK-ENCODING: [0x20,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060020 + +mova {z24.b, z25.b}, za0h.b[w14, 6:7] // 11000000-00000110-01000000-01111000 +// CHECK-INST: mov { z24.b, z25.b }, za0h.b[w14, 6:7] +// CHECK-ENCODING: [0x78,0x40,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064078 + +mova {z0.b, z1.b}, za0h.b[w12, 8:9] // 11000000-00000110-00000000-10000000 +// CHECK-INST: mov { z0.b, z1.b }, za0h.b[w12, 8:9] +// CHECK-ENCODING: [0x80,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060080 + +mova {z16.b, z17.b}, za0h.b[w14, 2:3] // 11000000-00000110-01000000-00110000 +// CHECK-INST: mov { z16.b, z17.b }, za0h.b[w14, 2:3] +// CHECK-ENCODING: [0x30,0x40,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064030 + +mova {z28.b, z29.b}, za0h.b[w12, 12:13] // 11000000-00000110-00000000-11011100 +// CHECK-INST: mov { z28.b, z29.b }, za0h.b[w12, 12:13] +// CHECK-ENCODING: [0xdc,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00600dc + +mova {z2.b, z3.b}, za0h.b[w15, 2:3] // 11000000-00000110-01100000-00100010 +// CHECK-INST: mov { z2.b, z3.b }, za0h.b[w15, 2:3] +// CHECK-ENCODING: [0x22,0x60,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066022 + +mova {z6.b, z7.b}, za0h.b[w13, 8:9] // 11000000-00000110-00100000-10000110 +// CHECK-INST: mov { z6.b, z7.b }, za0h.b[w13, 8:9] +// CHECK-ENCODING: [0x86,0x20,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062086 + +// Aliases + +mov {z0.b, z1.b}, za0h.b[w12, 0:1] // 11000000-00000110-00000000-00000000 +// CHECK-INST: mov { z0.b, z1.b }, za0h.b[w12, 0:1] +// CHECK-ENCODING: [0x00,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060000 + +mov {z20.b, z21.b}, za0h.b[w14, 4:5] // 11000000-00000110-01000000-01010100 +// CHECK-INST: mov { z20.b, z21.b }, za0h.b[w14, 4:5] +// CHECK-ENCODING: [0x54,0x40,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064054 + +mov {z22.b, z23.b}, za0h.b[w15, 10:11] // 11000000-00000110-01100000-10110110 +// CHECK-INST: mov { z22.b, z23.b }, za0h.b[w15, 10:11] +// CHECK-ENCODING: [0xb6,0x60,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00660b6 + +mov {z30.b, z31.b}, za0h.b[w15, 14:15] // 11000000-00000110-01100000-11111110 +// CHECK-INST: mov { z30.b, z31.b }, za0h.b[w15, 14:15] +// CHECK-ENCODING: [0xfe,0x60,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00660fe + +mov {z4.b, z5.b}, za0h.b[w12, 2:3] // 11000000-00000110-00000000-00100100 +// CHECK-INST: mov { z4.b, z5.b }, za0h.b[w12, 2:3] +// CHECK-ENCODING: [0x24,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060024 + +mov {z0.b, z1.b}, za0h.b[w12, 2:3] // 11000000-00000110-00000000-00100000 +// CHECK-INST: mov { z0.b, z1.b }, za0h.b[w12, 2:3] +// CHECK-ENCODING: [0x20,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060020 + +mov {z24.b, z25.b}, za0h.b[w14, 6:7] // 11000000-00000110-01000000-01111000 +// CHECK-INST: mov { z24.b, z25.b }, za0h.b[w14, 6:7] +// CHECK-ENCODING: [0x78,0x40,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064078 + +mov {z0.b, z1.b}, za0h.b[w12, 8:9] // 11000000-00000110-00000000-10000000 +// CHECK-INST: mov { z0.b, z1.b }, za0h.b[w12, 8:9] +// CHECK-ENCODING: [0x80,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060080 + +mov {z16.b, z17.b}, za0h.b[w14, 2:3] // 11000000-00000110-01000000-00110000 +// CHECK-INST: mov { z16.b, z17.b }, za0h.b[w14, 2:3] +// CHECK-ENCODING: [0x30,0x40,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064030 + +mov {z28.b, z29.b}, za0h.b[w12, 12:13] // 11000000-00000110-00000000-11011100 +// CHECK-INST: mov { z28.b, z29.b }, za0h.b[w12, 12:13] +// CHECK-ENCODING: [0xdc,0x00,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00600dc + +mov {z2.b, z3.b}, za0h.b[w15, 2:3] // 11000000-00000110-01100000-00100010 +// CHECK-INST: mov { z2.b, z3.b }, za0h.b[w15, 2:3] +// CHECK-ENCODING: [0x22,0x60,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066022 + +mov {z6.b, z7.b}, za0h.b[w13, 8:9] // 11000000-00000110-00100000-10000110 +// CHECK-INST: mov { z6.b, z7.b }, za0h.b[w13, 8:9] +// CHECK-ENCODING: [0x86,0x20,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062086 + + +mova {z0.b, z1.b}, za0v.b[w12, 0:1] // 11000000-00000110-10000000-00000000 +// CHECK-INST: mov { z0.b, z1.b }, za0v.b[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068000 + +mova {z20.b, z21.b}, za0v.b[w14, 4:5] // 11000000-00000110-11000000-01010100 +// CHECK-INST: mov { z20.b, z21.b }, za0v.b[w14, 4:5] +// CHECK-ENCODING: [0x54,0xc0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c054 + +mova {z22.b, z23.b}, za0v.b[w15, 10:11] // 11000000-00000110-11100000-10110110 +// CHECK-INST: mov { z22.b, z23.b }, za0v.b[w15, 10:11] +// CHECK-ENCODING: [0xb6,0xe0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e0b6 + +mova {z30.b, z31.b}, za0v.b[w15, 14:15] // 11000000-00000110-11100000-11111110 +// CHECK-INST: mov { z30.b, z31.b }, za0v.b[w15, 14:15] +// CHECK-ENCODING: [0xfe,0xe0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e0fe + +mova {z4.b, z5.b}, za0v.b[w12, 2:3] // 11000000-00000110-10000000-00100100 +// CHECK-INST: mov { z4.b, z5.b }, za0v.b[w12, 2:3] +// CHECK-ENCODING: [0x24,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068024 + +mova {z0.b, z1.b}, za0v.b[w12, 2:3] // 11000000-00000110-10000000-00100000 +// CHECK-INST: mov { z0.b, z1.b }, za0v.b[w12, 2:3] +// CHECK-ENCODING: [0x20,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068020 + +mova {z24.b, z25.b}, za0v.b[w14, 6:7] // 11000000-00000110-11000000-01111000 +// CHECK-INST: mov { z24.b, z25.b }, za0v.b[w14, 6:7] +// CHECK-ENCODING: [0x78,0xc0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c078 + +mova {z0.b, z1.b}, za0v.b[w12, 8:9] // 11000000-00000110-10000000-10000000 +// CHECK-INST: mov { z0.b, z1.b }, za0v.b[w12, 8:9] +// CHECK-ENCODING: [0x80,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068080 + +mova {z16.b, z17.b}, za0v.b[w14, 2:3] // 11000000-00000110-11000000-00110000 +// CHECK-INST: mov { z16.b, z17.b }, za0v.b[w14, 2:3] +// CHECK-ENCODING: [0x30,0xc0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c030 + +mova {z28.b, z29.b}, za0v.b[w12, 12:13] // 11000000-00000110-10000000-11011100 +// CHECK-INST: mov { z28.b, z29.b }, za0v.b[w12, 12:13] +// CHECK-ENCODING: [0xdc,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00680dc + +mova {z2.b, z3.b}, za0v.b[w15, 2:3] // 11000000-00000110-11100000-00100010 +// CHECK-INST: mov { z2.b, z3.b }, za0v.b[w15, 2:3] +// CHECK-ENCODING: [0x22,0xe0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e022 + +mova {z6.b, z7.b}, za0v.b[w13, 8:9] // 11000000-00000110-10100000-10000110 +// CHECK-INST: mov { z6.b, z7.b }, za0v.b[w13, 8:9] +// CHECK-ENCODING: [0x86,0xa0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006a086 + +// Aliases + +mov {z0.b, z1.b}, za0v.b[w12, 0:1] // 11000000-00000110-10000000-00000000 +// CHECK-INST: mov { z0.b, z1.b }, za0v.b[w12, 0:1] +// CHECK-ENCODING: [0x00,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068000 + +mov {z20.b, z21.b}, za0v.b[w14, 4:5] // 11000000-00000110-11000000-01010100 +// CHECK-INST: mov { z20.b, z21.b }, za0v.b[w14, 4:5] +// CHECK-ENCODING: [0x54,0xc0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c054 + +mov {z22.b, z23.b}, za0v.b[w15, 10:11] // 11000000-00000110-11100000-10110110 +// CHECK-INST: mov { z22.b, z23.b }, za0v.b[w15, 10:11] +// CHECK-ENCODING: [0xb6,0xe0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e0b6 + +mov {z30.b, z31.b}, za0v.b[w15, 14:15] // 11000000-00000110-11100000-11111110 +// CHECK-INST: mov { z30.b, z31.b }, za0v.b[w15, 14:15] +// CHECK-ENCODING: [0xfe,0xe0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e0fe + +mov {z4.b, z5.b}, za0v.b[w12, 2:3] // 11000000-00000110-10000000-00100100 +// CHECK-INST: mov { z4.b, z5.b }, za0v.b[w12, 2:3] +// CHECK-ENCODING: [0x24,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068024 + +mov {z0.b, z1.b}, za0v.b[w12, 2:3] // 11000000-00000110-10000000-00100000 +// CHECK-INST: mov { z0.b, z1.b }, za0v.b[w12, 2:3] +// CHECK-ENCODING: [0x20,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068020 + +mov {z24.b, z25.b}, za0v.b[w14, 6:7] // 11000000-00000110-11000000-01111000 +// CHECK-INST: mov { z24.b, z25.b }, za0v.b[w14, 6:7] +// CHECK-ENCODING: [0x78,0xc0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c078 + +mov {z0.b, z1.b}, za0v.b[w12, 8:9] // 11000000-00000110-10000000-10000000 +// CHECK-INST: mov { z0.b, z1.b }, za0v.b[w12, 8:9] +// CHECK-ENCODING: [0x80,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068080 + +mov {z16.b, z17.b}, za0v.b[w14, 2:3] // 11000000-00000110-11000000-00110000 +// CHECK-INST: mov { z16.b, z17.b }, za0v.b[w14, 2:3] +// CHECK-ENCODING: [0x30,0xc0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c030 + +mov {z28.b, z29.b}, za0v.b[w12, 12:13] // 11000000-00000110-10000000-11011100 +// CHECK-INST: mov { z28.b, z29.b }, za0v.b[w12, 12:13] +// CHECK-ENCODING: [0xdc,0x80,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00680dc + +mov {z2.b, z3.b}, za0v.b[w15, 2:3] // 11000000-00000110-11100000-00100010 +// CHECK-INST: mov { z2.b, z3.b }, za0v.b[w15, 2:3] +// CHECK-ENCODING: [0x22,0xe0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e022 + +mov {z6.b, z7.b}, za0v.b[w13, 8:9] // 11000000-00000110-10100000-10000110 +// CHECK-INST: mov { z6.b, z7.b }, za0v.b[w13, 8:9] +// CHECK-ENCODING: [0x86,0xa0,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006a086 + + +mova za0h.b[w12, 0:1], {z0.b, z1.b} // 11000000-00000100-00000000-00000000 +// CHECK-INST: mov za0h.b[w12, 0:1], { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x00,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040000 + +mova za0h.b[w14, 10:11], {z10.b, z11.b} // 11000000-00000100-01000001-01000101 +// CHECK-INST: mov za0h.b[w14, 10:11], { z10.b, z11.b } +// CHECK-ENCODING: [0x45,0x41,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044145 + +mova za0h.b[w15, 14:15], {z12.b, z13.b} // 11000000-00000100-01100001-10000111 +// CHECK-INST: mov za0h.b[w15, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0x61,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046187 + +mova za0h.b[w15, 14:15], {z30.b, z31.b} // 11000000-00000100-01100011-11000111 +// CHECK-INST: mov za0h.b[w15, 14:15], { z30.b, z31.b } +// CHECK-ENCODING: [0xc7,0x63,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00463c7 + +mova za0h.b[w12, 10:11], {z16.b, z17.b} // 11000000-00000100-00000010-00000101 +// CHECK-INST: mov za0h.b[w12, 10:11], { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x02,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040205 + +mova za0h.b[w12, 2:3], {z0.b, z1.b} // 11000000-00000100-00000000-00000001 +// CHECK-INST: mov za0h.b[w12, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0x00,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040001 + +mova za0h.b[w14, 0:1], {z18.b, z19.b} // 11000000-00000100-01000010-01000000 +// CHECK-INST: mov za0h.b[w14, 0:1], { z18.b, z19.b } +// CHECK-ENCODING: [0x40,0x42,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044240 + +mova za0h.b[w12, 0:1], {z12.b, z13.b} // 11000000-00000100-00000001-10000000 +// CHECK-INST: mov za0h.b[w12, 0:1], { z12.b, z13.b } +// CHECK-ENCODING: [0x80,0x01,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040180 + +mova za0h.b[w14, 2:3], {z0.b, z1.b} // 11000000-00000100-01000000-00000001 +// CHECK-INST: mov za0h.b[w14, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0x40,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044001 + +mova za0h.b[w12, 10:11], {z22.b, z23.b} // 11000000-00000100-00000010-11000101 +// CHECK-INST: mov za0h.b[w12, 10:11], { z22.b, z23.b } +// CHECK-ENCODING: [0xc5,0x02,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00402c5 + +mova za0h.b[w15, 4:5], {z8.b, z9.b} // 11000000-00000100-01100001-00000010 +// CHECK-INST: mov za0h.b[w15, 4:5], { z8.b, z9.b } +// CHECK-ENCODING: [0x02,0x61,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046102 + +mova za0h.b[w13, 14:15], {z12.b, z13.b} // 11000000-00000100-00100001-10000111 +// CHECK-INST: mov za0h.b[w13, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0x21,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042187 + +// Aliases + +mov za0h.b[w12, 0:1], {z0.b, z1.b} // 11000000-00000100-00000000-00000000 +// CHECK-INST: mov za0h.b[w12, 0:1], { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x00,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040000 + +mov za0h.b[w14, 10:11], {z10.b, z11.b} // 11000000-00000100-01000001-01000101 +// CHECK-INST: mov za0h.b[w14, 10:11], { z10.b, z11.b } +// CHECK-ENCODING: [0x45,0x41,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044145 + +mov za0h.b[w15, 14:15], {z12.b, z13.b} // 11000000-00000100-01100001-10000111 +// CHECK-INST: mov za0h.b[w15, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0x61,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046187 + +mov za0h.b[w15, 14:15], {z30.b, z31.b} // 11000000-00000100-01100011-11000111 +// CHECK-INST: mov za0h.b[w15, 14:15], { z30.b, z31.b } +// CHECK-ENCODING: [0xc7,0x63,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00463c7 + +mov za0h.b[w12, 10:11], {z16.b, z17.b} // 11000000-00000100-00000010-00000101 +// CHECK-INST: mov za0h.b[w12, 10:11], { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x02,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040205 + +mov za0h.b[w12, 2:3], {z0.b, z1.b} // 11000000-00000100-00000000-00000001 +// CHECK-INST: mov za0h.b[w12, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0x00,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040001 + +mov za0h.b[w14, 0:1], {z18.b, z19.b} // 11000000-00000100-01000010-01000000 +// CHECK-INST: mov za0h.b[w14, 0:1], { z18.b, z19.b } +// CHECK-ENCODING: [0x40,0x42,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044240 + +mov za0h.b[w12, 0:1], {z12.b, z13.b} // 11000000-00000100-00000001-10000000 +// CHECK-INST: mov za0h.b[w12, 0:1], { z12.b, z13.b } +// CHECK-ENCODING: [0x80,0x01,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040180 + +mov za0h.b[w14, 2:3], {z0.b, z1.b} // 11000000-00000100-01000000-00000001 +// CHECK-INST: mov za0h.b[w14, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0x40,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044001 + +mov za0h.b[w12, 10:11], {z22.b, z23.b} // 11000000-00000100-00000010-11000101 +// CHECK-INST: mov za0h.b[w12, 10:11], { z22.b, z23.b } +// CHECK-ENCODING: [0xc5,0x02,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00402c5 + +mov za0h.b[w15, 4:5], {z8.b, z9.b} // 11000000-00000100-01100001-00000010 +// CHECK-INST: mov za0h.b[w15, 4:5], { z8.b, z9.b } +// CHECK-ENCODING: [0x02,0x61,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046102 + +mov za0h.b[w13, 14:15], {z12.b, z13.b} // 11000000-00000100-00100001-10000111 +// CHECK-INST: mov za0h.b[w13, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0x21,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042187 + + +mova za0v.b[w12, 0:1], {z0.b, z1.b} // 11000000-00000100-10000000-00000000 +// CHECK-INST: mov za0v.b[w12, 0:1], { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x80,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048000 + +mova za0v.b[w14, 10:11], {z10.b, z11.b} // 11000000-00000100-11000001-01000101 +// CHECK-INST: mov za0v.b[w14, 10:11], { z10.b, z11.b } +// CHECK-ENCODING: [0x45,0xc1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c145 + +mova za0v.b[w15, 14:15], {z12.b, z13.b} // 11000000-00000100-11100001-10000111 +// CHECK-INST: mov za0v.b[w15, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0xe1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e187 + +mova za0v.b[w15, 14:15], {z30.b, z31.b} // 11000000-00000100-11100011-11000111 +// CHECK-INST: mov za0v.b[w15, 14:15], { z30.b, z31.b } +// CHECK-ENCODING: [0xc7,0xe3,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e3c7 + +mova za0v.b[w12, 10:11], {z16.b, z17.b} // 11000000-00000100-10000010-00000101 +// CHECK-INST: mov za0v.b[w12, 10:11], { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x82,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048205 + +mova za0v.b[w12, 2:3], {z0.b, z1.b} // 11000000-00000100-10000000-00000001 +// CHECK-INST: mov za0v.b[w12, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0x80,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048001 + +mova za0v.b[w14, 0:1], {z18.b, z19.b} // 11000000-00000100-11000010-01000000 +// CHECK-INST: mov za0v.b[w14, 0:1], { z18.b, z19.b } +// CHECK-ENCODING: [0x40,0xc2,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c240 + +mova za0v.b[w12, 0:1], {z12.b, z13.b} // 11000000-00000100-10000001-10000000 +// CHECK-INST: mov za0v.b[w12, 0:1], { z12.b, z13.b } +// CHECK-ENCODING: [0x80,0x81,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048180 + +mova za0v.b[w14, 2:3], {z0.b, z1.b} // 11000000-00000100-11000000-00000001 +// CHECK-INST: mov za0v.b[w14, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0xc0,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c001 + +mova za0v.b[w12, 10:11], {z22.b, z23.b} // 11000000-00000100-10000010-11000101 +// CHECK-INST: mov za0v.b[w12, 10:11], { z22.b, z23.b } +// CHECK-ENCODING: [0xc5,0x82,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00482c5 + +mova za0v.b[w15, 4:5], {z8.b, z9.b} // 11000000-00000100-11100001-00000010 +// CHECK-INST: mov za0v.b[w15, 4:5], { z8.b, z9.b } +// CHECK-ENCODING: [0x02,0xe1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e102 + +mova za0v.b[w13, 14:15], {z12.b, z13.b} // 11000000-00000100-10100001-10000111 +// CHECK-INST: mov za0v.b[w13, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0xa1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004a187 + +// Aliases + +mov za0v.b[w12, 0:1], {z0.b, z1.b} // 11000000-00000100-10000000-00000000 +// CHECK-INST: mov za0v.b[w12, 0:1], { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x80,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048000 + +mov za0v.b[w14, 10:11], {z10.b, z11.b} // 11000000-00000100-11000001-01000101 +// CHECK-INST: mov za0v.b[w14, 10:11], { z10.b, z11.b } +// CHECK-ENCODING: [0x45,0xc1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c145 + +mov za0v.b[w15, 14:15], {z12.b, z13.b} // 11000000-00000100-11100001-10000111 +// CHECK-INST: mov za0v.b[w15, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0xe1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e187 + +mov za0v.b[w15, 14:15], {z30.b, z31.b} // 11000000-00000100-11100011-11000111 +// CHECK-INST: mov za0v.b[w15, 14:15], { z30.b, z31.b } +// CHECK-ENCODING: [0xc7,0xe3,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e3c7 + +mov za0v.b[w12, 10:11], {z16.b, z17.b} // 11000000-00000100-10000010-00000101 +// CHECK-INST: mov za0v.b[w12, 10:11], { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x82,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048205 + +mov za0v.b[w12, 2:3], {z0.b, z1.b} // 11000000-00000100-10000000-00000001 +// CHECK-INST: mov za0v.b[w12, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0x80,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048001 + +mov za0v.b[w14, 0:1], {z18.b, z19.b} // 11000000-00000100-11000010-01000000 +// CHECK-INST: mov za0v.b[w14, 0:1], { z18.b, z19.b } +// CHECK-ENCODING: [0x40,0xc2,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c240 + +mov za0v.b[w12, 0:1], {z12.b, z13.b} // 11000000-00000100-10000001-10000000 +// CHECK-INST: mov za0v.b[w12, 0:1], { z12.b, z13.b } +// CHECK-ENCODING: [0x80,0x81,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048180 + +mov za0v.b[w14, 2:3], {z0.b, z1.b} // 11000000-00000100-11000000-00000001 +// CHECK-INST: mov za0v.b[w14, 2:3], { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0xc0,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c001 + +mov za0v.b[w12, 10:11], {z22.b, z23.b} // 11000000-00000100-10000010-11000101 +// CHECK-INST: mov za0v.b[w12, 10:11], { z22.b, z23.b } +// CHECK-ENCODING: [0xc5,0x82,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c00482c5 + +mov za0v.b[w15, 4:5], {z8.b, z9.b} // 11000000-00000100-11100001-00000010 +// CHECK-INST: mov za0v.b[w15, 4:5], { z8.b, z9.b } +// CHECK-ENCODING: [0x02,0xe1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e102 + +mov za0v.b[w13, 14:15], {z12.b, z13.b} // 11000000-00000100-10100001-10000111 +// CHECK-INST: mov za0v.b[w13, 14:15], { z12.b, z13.b } +// CHECK-ENCODING: [0x87,0xa1,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004a187 + + +mova {z0.h - z3.h}, za0h.h[w12, 0:3] // 11000000-01000110-00000100-00000000 +// CHECK-INST: mov { z0.h - z3.h }, za0h.h[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460400 + +mova {z20.h - z23.h}, za1h.h[w14, 0:3] // 11000000-01000110-01000100-01010100 +// CHECK-INST: mov { z20.h - z23.h }, za1h.h[w14, 0:3] +// CHECK-ENCODING: [0x54,0x44,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464454 + +mova {z20.h - z23.h}, za0h.h[w15, 4:7] // 11000000-01000110-01100100-00110100 +// CHECK-INST: mov { z20.h - z23.h }, za0h.h[w15, 4:7] +// CHECK-ENCODING: [0x34,0x64,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0466434 + +mova {z28.h - z31.h}, za1h.h[w15, 4:7] // 11000000-01000110-01100100-01111100 +// CHECK-INST: mov { z28.h - z31.h }, za1h.h[w15, 4:7] +// CHECK-ENCODING: [0x7c,0x64,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046647c + +mova {z4.h - z7.h}, za0h.h[w12, 4:7] // 11000000-01000110-00000100-00100100 +// CHECK-INST: mov { z4.h - z7.h }, za0h.h[w12, 4:7] +// CHECK-ENCODING: [0x24,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460424 + +mova {z0.h - z3.h}, za0h.h[w12, 4:7] // 11000000-01000110-00000100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0h.h[w12, 4:7] +// CHECK-ENCODING: [0x20,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460420 + +mova {z24.h - z27.h}, za1h.h[w14, 4:7] // 11000000-01000110-01000100-01111000 +// CHECK-INST: mov { z24.h - z27.h }, za1h.h[w14, 4:7] +// CHECK-ENCODING: [0x78,0x44,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464478 + +mova {z16.h - z19.h}, za0h.h[w14, 4:7] // 11000000-01000110-01000100-00110000 +// CHECK-INST: mov { z16.h - z19.h }, za0h.h[w14, 4:7] +// CHECK-ENCODING: [0x30,0x44,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464430 + +mova {z28.h - z31.h}, za1h.h[w12, 0:3] // 11000000-01000110-00000100-01011100 +// CHECK-INST: mov { z28.h - z31.h }, za1h.h[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046045c + +mova {z0.h - z3.h}, za0h.h[w15, 4:7] // 11000000-01000110-01100100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0h.h[w15, 4:7] +// CHECK-ENCODING: [0x20,0x64,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0466420 + +mova {z4.h - z7.h}, za0h.h[w13, 0:3] // 11000000-01000110-00100100-00000100 +// CHECK-INST: mov { z4.h - z7.h }, za0h.h[w13, 0:3] +// CHECK-ENCODING: [0x04,0x24,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0462404 + +// Aliases + +mov {z0.h - z3.h}, za0h.h[w12, 0:3] // 11000000-01000110-00000100-00000000 +// CHECK-INST: mov { z0.h - z3.h }, za0h.h[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460400 + +mov {z20.h - z23.h}, za1h.h[w14, 0:3] // 11000000-01000110-01000100-01010100 +// CHECK-INST: mov { z20.h - z23.h }, za1h.h[w14, 0:3] +// CHECK-ENCODING: [0x54,0x44,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464454 + +mov {z20.h - z23.h}, za0h.h[w15, 4:7] // 11000000-01000110-01100100-00110100 +// CHECK-INST: mov { z20.h - z23.h }, za0h.h[w15, 4:7] +// CHECK-ENCODING: [0x34,0x64,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0466434 + +mov {z28.h - z31.h}, za1h.h[w15, 4:7] // 11000000-01000110-01100100-01111100 +// CHECK-INST: mov { z28.h - z31.h }, za1h.h[w15, 4:7] +// CHECK-ENCODING: [0x7c,0x64,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046647c + +mov {z4.h - z7.h}, za0h.h[w12, 4:7] // 11000000-01000110-00000100-00100100 +// CHECK-INST: mov { z4.h - z7.h }, za0h.h[w12, 4:7] +// CHECK-ENCODING: [0x24,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460424 + +mov {z0.h - z3.h}, za0h.h[w12, 4:7] // 11000000-01000110-00000100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0h.h[w12, 4:7] +// CHECK-ENCODING: [0x20,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0460420 + +mov {z24.h - z27.h}, za1h.h[w14, 4:7] // 11000000-01000110-01000100-01111000 +// CHECK-INST: mov { z24.h - z27.h }, za1h.h[w14, 4:7] +// CHECK-ENCODING: [0x78,0x44,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464478 + +mov {z16.h - z19.h}, za0h.h[w14, 4:7] // 11000000-01000110-01000100-00110000 +// CHECK-INST: mov { z16.h - z19.h }, za0h.h[w14, 4:7] +// CHECK-ENCODING: [0x30,0x44,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0464430 + +mov {z28.h - z31.h}, za1h.h[w12, 0:3] // 11000000-01000110-00000100-01011100 +// CHECK-INST: mov { z28.h - z31.h }, za1h.h[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x04,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046045c + +mov {z0.h - z3.h}, za0h.h[w15, 4:7] // 11000000-01000110-01100100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0h.h[w15, 4:7] +// CHECK-ENCODING: [0x20,0x64,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0466420 + +mov {z4.h - z7.h}, za0h.h[w13, 0:3] // 11000000-01000110-00100100-00000100 +// CHECK-INST: mov { z4.h - z7.h }, za0h.h[w13, 0:3] +// CHECK-ENCODING: [0x04,0x24,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0462404 + + +mova {z0.h - z3.h}, za0v.h[w12, 0:3] // 11000000-01000110-10000100-00000000 +// CHECK-INST: mov { z0.h - z3.h }, za0v.h[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468400 + +mova {z20.h - z23.h}, za1v.h[w14, 0:3] // 11000000-01000110-11000100-01010100 +// CHECK-INST: mov { z20.h - z23.h }, za1v.h[w14, 0:3] +// CHECK-ENCODING: [0x54,0xc4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c454 + +mova {z20.h - z23.h}, za0v.h[w15, 4:7] // 11000000-01000110-11100100-00110100 +// CHECK-INST: mov { z20.h - z23.h }, za0v.h[w15, 4:7] +// CHECK-ENCODING: [0x34,0xe4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e434 + +mova {z28.h - z31.h}, za1v.h[w15, 4:7] // 11000000-01000110-11100100-01111100 +// CHECK-INST: mov { z28.h - z31.h }, za1v.h[w15, 4:7] +// CHECK-ENCODING: [0x7c,0xe4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e47c + +mova {z4.h - z7.h}, za0v.h[w12, 4:7] // 11000000-01000110-10000100-00100100 +// CHECK-INST: mov { z4.h - z7.h }, za0v.h[w12, 4:7] +// CHECK-ENCODING: [0x24,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468424 + +mova {z0.h - z3.h}, za0v.h[w12, 4:7] // 11000000-01000110-10000100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0v.h[w12, 4:7] +// CHECK-ENCODING: [0x20,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468420 + +mova {z24.h - z27.h}, za1v.h[w14, 4:7] // 11000000-01000110-11000100-01111000 +// CHECK-INST: mov { z24.h - z27.h }, za1v.h[w14, 4:7] +// CHECK-ENCODING: [0x78,0xc4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c478 + +mova {z16.h - z19.h}, za0v.h[w14, 4:7] // 11000000-01000110-11000100-00110000 +// CHECK-INST: mov { z16.h - z19.h }, za0v.h[w14, 4:7] +// CHECK-ENCODING: [0x30,0xc4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c430 + +mova {z28.h - z31.h}, za1v.h[w12, 0:3] // 11000000-01000110-10000100-01011100 +// CHECK-INST: mov { z28.h - z31.h }, za1v.h[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046845c + +mova {z0.h - z3.h}, za0v.h[w15, 4:7] // 11000000-01000110-11100100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0v.h[w15, 4:7] +// CHECK-ENCODING: [0x20,0xe4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e420 + +mova {z4.h - z7.h}, za0v.h[w13, 0:3] // 11000000-01000110-10100100-00000100 +// CHECK-INST: mov { z4.h - z7.h }, za0v.h[w13, 0:3] +// CHECK-ENCODING: [0x04,0xa4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046a404 + +// Aliases + +mov {z0.h - z3.h}, za0v.h[w12, 0:3] // 11000000-01000110-10000100-00000000 +// CHECK-INST: mov { z0.h - z3.h }, za0v.h[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468400 + +mov {z20.h - z23.h}, za1v.h[w14, 0:3] // 11000000-01000110-11000100-01010100 +// CHECK-INST: mov { z20.h - z23.h }, za1v.h[w14, 0:3] +// CHECK-ENCODING: [0x54,0xc4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c454 + +mov {z20.h - z23.h}, za0v.h[w15, 4:7] // 11000000-01000110-11100100-00110100 +// CHECK-INST: mov { z20.h - z23.h }, za0v.h[w15, 4:7] +// CHECK-ENCODING: [0x34,0xe4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e434 + +mov {z28.h - z31.h}, za1v.h[w15, 4:7] // 11000000-01000110-11100100-01111100 +// CHECK-INST: mov { z28.h - z31.h }, za1v.h[w15, 4:7] +// CHECK-ENCODING: [0x7c,0xe4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e47c + +mov {z4.h - z7.h}, za0v.h[w12, 4:7] // 11000000-01000110-10000100-00100100 +// CHECK-INST: mov { z4.h - z7.h }, za0v.h[w12, 4:7] +// CHECK-ENCODING: [0x24,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468424 + +mov {z0.h - z3.h}, za0v.h[w12, 4:7] // 11000000-01000110-10000100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0v.h[w12, 4:7] +// CHECK-ENCODING: [0x20,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0468420 + +mov {z24.h - z27.h}, za1v.h[w14, 4:7] // 11000000-01000110-11000100-01111000 +// CHECK-INST: mov { z24.h - z27.h }, za1v.h[w14, 4:7] +// CHECK-ENCODING: [0x78,0xc4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c478 + +mov {z16.h - z19.h}, za0v.h[w14, 4:7] // 11000000-01000110-11000100-00110000 +// CHECK-INST: mov { z16.h - z19.h }, za0v.h[w14, 4:7] +// CHECK-ENCODING: [0x30,0xc4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046c430 + +mov {z28.h - z31.h}, za1v.h[w12, 0:3] // 11000000-01000110-10000100-01011100 +// CHECK-INST: mov { z28.h - z31.h }, za1v.h[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x84,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046845c + +mov {z0.h - z3.h}, za0v.h[w15, 4:7] // 11000000-01000110-11100100-00100000 +// CHECK-INST: mov { z0.h - z3.h }, za0v.h[w15, 4:7] +// CHECK-ENCODING: [0x20,0xe4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046e420 + +mov {z4.h - z7.h}, za0v.h[w13, 0:3] // 11000000-01000110-10100100-00000100 +// CHECK-INST: mov { z4.h - z7.h }, za0v.h[w13, 0:3] +// CHECK-ENCODING: [0x04,0xa4,0x46,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c046a404 + + +mova za0h.h[w12, 0:3], {z0.h - z3.h} // 11000000-01000100-00000100-00000000 +// CHECK-INST: mov za0h.h[w12, 0:3], { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x04,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440400 + +mova za0h.h[w14, 4:7], {z8.h - z11.h} // 11000000-01000100-01000101-00000001 +// CHECK-INST: mov za0h.h[w14, 4:7], { z8.h - z11.h } +// CHECK-ENCODING: [0x01,0x45,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444501 + +mova za1h.h[w15, 4:7], {z12.h - z15.h} // 11000000-01000100-01100101-10000011 +// CHECK-INST: mov za1h.h[w15, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0x65,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446583 + +mova za1h.h[w15, 4:7], {z28.h - z31.h} // 11000000-01000100-01100111-10000011 +// CHECK-INST: mov za1h.h[w15, 4:7], { z28.h - z31.h } +// CHECK-ENCODING: [0x83,0x67,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446783 + +mova za0h.h[w12, 4:7], {z16.h - z19.h} // 11000000-01000100-00000110-00000001 +// CHECK-INST: mov za0h.h[w12, 4:7], { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x06,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440601 + +mova za0h.h[w12, 4:7], {z0.h - z3.h} // 11000000-01000100-00000100-00000001 +// CHECK-INST: mov za0h.h[w12, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0x04,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440401 + +mova za0h.h[w14, 0:3], {z16.h - z19.h} // 11000000-01000100-01000110-00000000 +// CHECK-INST: mov za0h.h[w14, 0:3], { z16.h - z19.h } +// CHECK-ENCODING: [0x00,0x46,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444600 + +mova za0h.h[w12, 0:3], {z12.h - z15.h} // 11000000-01000100-00000101-10000000 +// CHECK-INST: mov za0h.h[w12, 0:3], { z12.h - z15.h } +// CHECK-ENCODING: [0x80,0x05,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440580 + +mova za0h.h[w14, 4:7], {z0.h - z3.h} // 11000000-01000100-01000100-00000001 +// CHECK-INST: mov za0h.h[w14, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0x44,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444401 + +mova za0h.h[w12, 4:7], {z20.h - z23.h} // 11000000-01000100-00000110-10000001 +// CHECK-INST: mov za0h.h[w12, 4:7], { z20.h - z23.h } +// CHECK-ENCODING: [0x81,0x06,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440681 + +mova za1h.h[w15, 0:3], {z8.h - z11.h} // 11000000-01000100-01100101-00000010 +// CHECK-INST: mov za1h.h[w15, 0:3], { z8.h - z11.h } +// CHECK-ENCODING: [0x02,0x65,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446502 + +mova za1h.h[w13, 4:7], {z12.h - z15.h} // 11000000-01000100-00100101-10000011 +// CHECK-INST: mov za1h.h[w13, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0x25,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0442583 + +// Aliases + +mov za0h.h[w12, 0:3], {z0.h - z3.h} // 11000000-01000100-00000100-00000000 +// CHECK-INST: mov za0h.h[w12, 0:3], { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x04,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440400 + +mov za0h.h[w14, 4:7], {z8.h - z11.h} // 11000000-01000100-01000101-00000001 +// CHECK-INST: mov za0h.h[w14, 4:7], { z8.h - z11.h } +// CHECK-ENCODING: [0x01,0x45,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444501 + +mov za1h.h[w15, 4:7], {z12.h - z15.h} // 11000000-01000100-01100101-10000011 +// CHECK-INST: mov za1h.h[w15, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0x65,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446583 + +mov za1h.h[w15, 4:7], {z28.h - z31.h} // 11000000-01000100-01100111-10000011 +// CHECK-INST: mov za1h.h[w15, 4:7], { z28.h - z31.h } +// CHECK-ENCODING: [0x83,0x67,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446783 + +mov za0h.h[w12, 4:7], {z16.h - z19.h} // 11000000-01000100-00000110-00000001 +// CHECK-INST: mov za0h.h[w12, 4:7], { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x06,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440601 + +mov za0h.h[w12, 4:7], {z0.h - z3.h} // 11000000-01000100-00000100-00000001 +// CHECK-INST: mov za0h.h[w12, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0x04,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440401 + +mov za0h.h[w14, 0:3], {z16.h - z19.h} // 11000000-01000100-01000110-00000000 +// CHECK-INST: mov za0h.h[w14, 0:3], { z16.h - z19.h } +// CHECK-ENCODING: [0x00,0x46,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444600 + +mov za0h.h[w12, 0:3], {z12.h - z15.h} // 11000000-01000100-00000101-10000000 +// CHECK-INST: mov za0h.h[w12, 0:3], { z12.h - z15.h } +// CHECK-ENCODING: [0x80,0x05,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440580 + +mov za0h.h[w14, 4:7], {z0.h - z3.h} // 11000000-01000100-01000100-00000001 +// CHECK-INST: mov za0h.h[w14, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0x44,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0444401 + +mov za0h.h[w12, 4:7], {z20.h - z23.h} // 11000000-01000100-00000110-10000001 +// CHECK-INST: mov za0h.h[w12, 4:7], { z20.h - z23.h } +// CHECK-ENCODING: [0x81,0x06,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0440681 + +mov za1h.h[w15, 0:3], {z8.h - z11.h} // 11000000-01000100-01100101-00000010 +// CHECK-INST: mov za1h.h[w15, 0:3], { z8.h - z11.h } +// CHECK-ENCODING: [0x02,0x65,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0446502 + +mov za1h.h[w13, 4:7], {z12.h - z15.h} // 11000000-01000100-00100101-10000011 +// CHECK-INST: mov za1h.h[w13, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0x25,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0442583 + + +mova za0v.h[w12, 0:3], {z0.h - z3.h} // 11000000-01000100-10000100-00000000 +// CHECK-INST: mov za0v.h[w12, 0:3], { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x84,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448400 + +mova za0v.h[w14, 4:7], {z8.h - z11.h} // 11000000-01000100-11000101-00000001 +// CHECK-INST: mov za0v.h[w14, 4:7], { z8.h - z11.h } +// CHECK-ENCODING: [0x01,0xc5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c501 + +mova za1v.h[w15, 4:7], {z12.h - z15.h} // 11000000-01000100-11100101-10000011 +// CHECK-INST: mov za1v.h[w15, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0xe5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e583 + +mova za1v.h[w15, 4:7], {z28.h - z31.h} // 11000000-01000100-11100111-10000011 +// CHECK-INST: mov za1v.h[w15, 4:7], { z28.h - z31.h } +// CHECK-ENCODING: [0x83,0xe7,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e783 + +mova za0v.h[w12, 4:7], {z16.h - z19.h} // 11000000-01000100-10000110-00000001 +// CHECK-INST: mov za0v.h[w12, 4:7], { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x86,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448601 + +mova za0v.h[w12, 4:7], {z0.h - z3.h} // 11000000-01000100-10000100-00000001 +// CHECK-INST: mov za0v.h[w12, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0x84,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448401 + +mova za0v.h[w14, 0:3], {z16.h - z19.h} // 11000000-01000100-11000110-00000000 +// CHECK-INST: mov za0v.h[w14, 0:3], { z16.h - z19.h } +// CHECK-ENCODING: [0x00,0xc6,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c600 + +mova za0v.h[w12, 0:3], {z12.h - z15.h} // 11000000-01000100-10000101-10000000 +// CHECK-INST: mov za0v.h[w12, 0:3], { z12.h - z15.h } +// CHECK-ENCODING: [0x80,0x85,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448580 + +mova za0v.h[w14, 4:7], {z0.h - z3.h} // 11000000-01000100-11000100-00000001 +// CHECK-INST: mov za0v.h[w14, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0xc4,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c401 + +mova za0v.h[w12, 4:7], {z20.h - z23.h} // 11000000-01000100-10000110-10000001 +// CHECK-INST: mov za0v.h[w12, 4:7], { z20.h - z23.h } +// CHECK-ENCODING: [0x81,0x86,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448681 + +mova za1v.h[w15, 0:3], {z8.h - z11.h} // 11000000-01000100-11100101-00000010 +// CHECK-INST: mov za1v.h[w15, 0:3], { z8.h - z11.h } +// CHECK-ENCODING: [0x02,0xe5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e502 + +mova za1v.h[w13, 4:7], {z12.h - z15.h} // 11000000-01000100-10100101-10000011 +// CHECK-INST: mov za1v.h[w13, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0xa5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044a583 + +// Aliases + +mov za0v.h[w12, 0:3], {z0.h - z3.h} // 11000000-01000100-10000100-00000000 +// CHECK-INST: mov za0v.h[w12, 0:3], { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x84,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448400 + +mov za0v.h[w14, 4:7], {z8.h - z11.h} // 11000000-01000100-11000101-00000001 +// CHECK-INST: mov za0v.h[w14, 4:7], { z8.h - z11.h } +// CHECK-ENCODING: [0x01,0xc5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c501 + +mov za1v.h[w15, 4:7], {z12.h - z15.h} // 11000000-01000100-11100101-10000011 +// CHECK-INST: mov za1v.h[w15, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0xe5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e583 + +mov za1v.h[w15, 4:7], {z28.h - z31.h} // 11000000-01000100-11100111-10000011 +// CHECK-INST: mov za1v.h[w15, 4:7], { z28.h - z31.h } +// CHECK-ENCODING: [0x83,0xe7,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e783 + +mov za0v.h[w12, 4:7], {z16.h - z19.h} // 11000000-01000100-10000110-00000001 +// CHECK-INST: mov za0v.h[w12, 4:7], { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x86,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448601 + +mov za0v.h[w12, 4:7], {z0.h - z3.h} // 11000000-01000100-10000100-00000001 +// CHECK-INST: mov za0v.h[w12, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0x84,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448401 + +mov za0v.h[w14, 0:3], {z16.h - z19.h} // 11000000-01000100-11000110-00000000 +// CHECK-INST: mov za0v.h[w14, 0:3], { z16.h - z19.h } +// CHECK-ENCODING: [0x00,0xc6,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c600 + +mov za0v.h[w12, 0:3], {z12.h - z15.h} // 11000000-01000100-10000101-10000000 +// CHECK-INST: mov za0v.h[w12, 0:3], { z12.h - z15.h } +// CHECK-ENCODING: [0x80,0x85,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448580 + +mov za0v.h[w14, 4:7], {z0.h - z3.h} // 11000000-01000100-11000100-00000001 +// CHECK-INST: mov za0v.h[w14, 4:7], { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0xc4,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044c401 + +mov za0v.h[w12, 4:7], {z20.h - z23.h} // 11000000-01000100-10000110-10000001 +// CHECK-INST: mov za0v.h[w12, 4:7], { z20.h - z23.h } +// CHECK-ENCODING: [0x81,0x86,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0448681 + +mov za1v.h[w15, 0:3], {z8.h - z11.h} // 11000000-01000100-11100101-00000010 +// CHECK-INST: mov za1v.h[w15, 0:3], { z8.h - z11.h } +// CHECK-ENCODING: [0x02,0xe5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044e502 + +mov za1v.h[w13, 4:7], {z12.h - z15.h} // 11000000-01000100-10100101-10000011 +// CHECK-INST: mov za1v.h[w13, 4:7], { z12.h - z15.h } +// CHECK-ENCODING: [0x83,0xa5,0x44,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c044a583 + + +mova {z0.s - z3.s}, za0h.s[w12, 0:3] // 11000000-10000110-00000100-00000000 +// CHECK-INST: mov { z0.s - z3.s }, za0h.s[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860400 + +mova {z20.s - z23.s}, za2h.s[w14, 0:3] // 11000000-10000110-01000100-01010100 +// CHECK-INST: mov { z20.s - z23.s }, za2h.s[w14, 0:3] +// CHECK-ENCODING: [0x54,0x44,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864454 + +mova {z20.s - z23.s}, za1h.s[w15, 0:3] // 11000000-10000110-01100100-00110100 +// CHECK-INST: mov { z20.s - z23.s }, za1h.s[w15, 0:3] +// CHECK-ENCODING: [0x34,0x64,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0866434 + +mova {z28.s - z31.s}, za3h.s[w15, 0:3] // 11000000-10000110-01100100-01111100 +// CHECK-INST: mov { z28.s - z31.s }, za3h.s[w15, 0:3] +// CHECK-ENCODING: [0x7c,0x64,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086647c + +mova {z4.s - z7.s}, za1h.s[w12, 0:3] // 11000000-10000110-00000100-00100100 +// CHECK-INST: mov { z4.s - z7.s }, za1h.s[w12, 0:3] +// CHECK-ENCODING: [0x24,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860424 + +mova {z0.s - z3.s}, za1h.s[w12, 0:3] // 11000000-10000110-00000100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1h.s[w12, 0:3] +// CHECK-ENCODING: [0x20,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860420 + +mova {z24.s - z27.s}, za3h.s[w14, 0:3] // 11000000-10000110-01000100-01111000 +// CHECK-INST: mov { z24.s - z27.s }, za3h.s[w14, 0:3] +// CHECK-ENCODING: [0x78,0x44,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864478 + +mova {z16.s - z19.s}, za1h.s[w14, 0:3] // 11000000-10000110-01000100-00110000 +// CHECK-INST: mov { z16.s - z19.s }, za1h.s[w14, 0:3] +// CHECK-ENCODING: [0x30,0x44,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864430 + +mova {z28.s - z31.s}, za2h.s[w12, 0:3] // 11000000-10000110-00000100-01011100 +// CHECK-INST: mov { z28.s - z31.s }, za2h.s[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086045c + +mova {z0.s - z3.s}, za1h.s[w15, 0:3] // 11000000-10000110-01100100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1h.s[w15, 0:3] +// CHECK-ENCODING: [0x20,0x64,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0866420 + +mova {z4.s - z7.s}, za0h.s[w13, 0:3] // 11000000-10000110-00100100-00000100 +// CHECK-INST: mov { z4.s - z7.s }, za0h.s[w13, 0:3] +// CHECK-ENCODING: [0x04,0x24,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0862404 + +// Aliases + +mov {z0.s - z3.s}, za0h.s[w12, 0:3] // 11000000-10000110-00000100-00000000 +// CHECK-INST: mov { z0.s - z3.s }, za0h.s[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860400 + +mov {z20.s - z23.s}, za2h.s[w14, 0:3] // 11000000-10000110-01000100-01010100 +// CHECK-INST: mov { z20.s - z23.s }, za2h.s[w14, 0:3] +// CHECK-ENCODING: [0x54,0x44,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864454 + +mov {z20.s - z23.s}, za1h.s[w15, 0:3] // 11000000-10000110-01100100-00110100 +// CHECK-INST: mov { z20.s - z23.s }, za1h.s[w15, 0:3] +// CHECK-ENCODING: [0x34,0x64,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0866434 + +mov {z28.s - z31.s}, za3h.s[w15, 0:3] // 11000000-10000110-01100100-01111100 +// CHECK-INST: mov { z28.s - z31.s }, za3h.s[w15, 0:3] +// CHECK-ENCODING: [0x7c,0x64,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086647c + +mov {z4.s - z7.s}, za1h.s[w12, 0:3] // 11000000-10000110-00000100-00100100 +// CHECK-INST: mov { z4.s - z7.s }, za1h.s[w12, 0:3] +// CHECK-ENCODING: [0x24,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860424 + +mov {z0.s - z3.s}, za1h.s[w12, 0:3] // 11000000-10000110-00000100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1h.s[w12, 0:3] +// CHECK-ENCODING: [0x20,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0860420 + +mov {z24.s - z27.s}, za3h.s[w14, 0:3] // 11000000-10000110-01000100-01111000 +// CHECK-INST: mov { z24.s - z27.s }, za3h.s[w14, 0:3] +// CHECK-ENCODING: [0x78,0x44,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864478 + +mov {z16.s - z19.s}, za1h.s[w14, 0:3] // 11000000-10000110-01000100-00110000 +// CHECK-INST: mov { z16.s - z19.s }, za1h.s[w14, 0:3] +// CHECK-ENCODING: [0x30,0x44,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0864430 + +mov {z28.s - z31.s}, za2h.s[w12, 0:3] // 11000000-10000110-00000100-01011100 +// CHECK-INST: mov { z28.s - z31.s }, za2h.s[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x04,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086045c + +mov {z0.s - z3.s}, za1h.s[w15, 0:3] // 11000000-10000110-01100100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1h.s[w15, 0:3] +// CHECK-ENCODING: [0x20,0x64,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0866420 + +mov {z4.s - z7.s}, za0h.s[w13, 0:3] // 11000000-10000110-00100100-00000100 +// CHECK-INST: mov { z4.s - z7.s }, za0h.s[w13, 0:3] +// CHECK-ENCODING: [0x04,0x24,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0862404 + + +mova {z0.s - z3.s}, za0v.s[w12, 0:3] // 11000000-10000110-10000100-00000000 +// CHECK-INST: mov { z0.s - z3.s }, za0v.s[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868400 + +mova {z20.s - z23.s}, za2v.s[w14, 0:3] // 11000000-10000110-11000100-01010100 +// CHECK-INST: mov { z20.s - z23.s }, za2v.s[w14, 0:3] +// CHECK-ENCODING: [0x54,0xc4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c454 + +mova {z20.s - z23.s}, za1v.s[w15, 0:3] // 11000000-10000110-11100100-00110100 +// CHECK-INST: mov { z20.s - z23.s }, za1v.s[w15, 0:3] +// CHECK-ENCODING: [0x34,0xe4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e434 + +mova {z28.s - z31.s}, za3v.s[w15, 0:3] // 11000000-10000110-11100100-01111100 +// CHECK-INST: mov { z28.s - z31.s }, za3v.s[w15, 0:3] +// CHECK-ENCODING: [0x7c,0xe4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e47c + +mova {z4.s - z7.s}, za1v.s[w12, 0:3] // 11000000-10000110-10000100-00100100 +// CHECK-INST: mov { z4.s - z7.s }, za1v.s[w12, 0:3] +// CHECK-ENCODING: [0x24,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868424 + +mova {z0.s - z3.s}, za1v.s[w12, 0:3] // 11000000-10000110-10000100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1v.s[w12, 0:3] +// CHECK-ENCODING: [0x20,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868420 + +mova {z24.s - z27.s}, za3v.s[w14, 0:3] // 11000000-10000110-11000100-01111000 +// CHECK-INST: mov { z24.s - z27.s }, za3v.s[w14, 0:3] +// CHECK-ENCODING: [0x78,0xc4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c478 + +mova {z16.s - z19.s}, za1v.s[w14, 0:3] // 11000000-10000110-11000100-00110000 +// CHECK-INST: mov { z16.s - z19.s }, za1v.s[w14, 0:3] +// CHECK-ENCODING: [0x30,0xc4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c430 + +mova {z28.s - z31.s}, za2v.s[w12, 0:3] // 11000000-10000110-10000100-01011100 +// CHECK-INST: mov { z28.s - z31.s }, za2v.s[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086845c + +mova {z0.s - z3.s}, za1v.s[w15, 0:3] // 11000000-10000110-11100100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1v.s[w15, 0:3] +// CHECK-ENCODING: [0x20,0xe4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e420 + +mova {z4.s - z7.s}, za0v.s[w13, 0:3] // 11000000-10000110-10100100-00000100 +// CHECK-INST: mov { z4.s - z7.s }, za0v.s[w13, 0:3] +// CHECK-ENCODING: [0x04,0xa4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086a404 + +// Aliases + +mov {z0.s - z3.s}, za0v.s[w12, 0:3] // 11000000-10000110-10000100-00000000 +// CHECK-INST: mov { z0.s - z3.s }, za0v.s[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868400 + +mov {z20.s - z23.s}, za2v.s[w14, 0:3] // 11000000-10000110-11000100-01010100 +// CHECK-INST: mov { z20.s - z23.s }, za2v.s[w14, 0:3] +// CHECK-ENCODING: [0x54,0xc4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c454 + +mov {z20.s - z23.s}, za1v.s[w15, 0:3] // 11000000-10000110-11100100-00110100 +// CHECK-INST: mov { z20.s - z23.s }, za1v.s[w15, 0:3] +// CHECK-ENCODING: [0x34,0xe4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e434 + +mov {z28.s - z31.s}, za3v.s[w15, 0:3] // 11000000-10000110-11100100-01111100 +// CHECK-INST: mov { z28.s - z31.s }, za3v.s[w15, 0:3] +// CHECK-ENCODING: [0x7c,0xe4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e47c + +mov {z4.s - z7.s}, za1v.s[w12, 0:3] // 11000000-10000110-10000100-00100100 +// CHECK-INST: mov { z4.s - z7.s }, za1v.s[w12, 0:3] +// CHECK-ENCODING: [0x24,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868424 + +mov {z0.s - z3.s}, za1v.s[w12, 0:3] // 11000000-10000110-10000100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1v.s[w12, 0:3] +// CHECK-ENCODING: [0x20,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0868420 + +mov {z24.s - z27.s}, za3v.s[w14, 0:3] // 11000000-10000110-11000100-01111000 +// CHECK-INST: mov { z24.s - z27.s }, za3v.s[w14, 0:3] +// CHECK-ENCODING: [0x78,0xc4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c478 + +mov {z16.s - z19.s}, za1v.s[w14, 0:3] // 11000000-10000110-11000100-00110000 +// CHECK-INST: mov { z16.s - z19.s }, za1v.s[w14, 0:3] +// CHECK-ENCODING: [0x30,0xc4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086c430 + +mov {z28.s - z31.s}, za2v.s[w12, 0:3] // 11000000-10000110-10000100-01011100 +// CHECK-INST: mov { z28.s - z31.s }, za2v.s[w12, 0:3] +// CHECK-ENCODING: [0x5c,0x84,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086845c + +mov {z0.s - z3.s}, za1v.s[w15, 0:3] // 11000000-10000110-11100100-00100000 +// CHECK-INST: mov { z0.s - z3.s }, za1v.s[w15, 0:3] +// CHECK-ENCODING: [0x20,0xe4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086e420 + +mov {z4.s - z7.s}, za0v.s[w13, 0:3] // 11000000-10000110-10100100-00000100 +// CHECK-INST: mov { z4.s - z7.s }, za0v.s[w13, 0:3] +// CHECK-ENCODING: [0x04,0xa4,0x86,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c086a404 + + +mova za0h.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-00000100-00000000 +// CHECK-INST: mov za0h.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0x04,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840400 + +mova za1h.s[w14, 0:3], {z8.s - z11.s} // 11000000-10000100-01000101-00000001 +// CHECK-INST: mov za1h.s[w14, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x01,0x45,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844501 + +mova za3h.s[w15, 0:3], {z12.s - z15.s} // 11000000-10000100-01100101-10000011 +// CHECK-INST: mov za3h.s[w15, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0x65,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846583 + +mova za3h.s[w15, 0:3], {z28.s - z31.s} // 11000000-10000100-01100111-10000011 +// CHECK-INST: mov za3h.s[w15, 0:3], { z28.s - z31.s } +// CHECK-ENCODING: [0x83,0x67,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846783 + +mova za1h.s[w12, 0:3], {z16.s - z19.s} // 11000000-10000100-00000110-00000001 +// CHECK-INST: mov za1h.s[w12, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x01,0x06,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840601 + +mova za1h.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-00000100-00000001 +// CHECK-INST: mov za1h.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0x04,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840401 + +mova za0h.s[w14, 0:3], {z16.s - z19.s} // 11000000-10000100-01000110-00000000 +// CHECK-INST: mov za0h.s[w14, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x00,0x46,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844600 + +mova za0h.s[w12, 0:3], {z12.s - z15.s} // 11000000-10000100-00000101-10000000 +// CHECK-INST: mov za0h.s[w12, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x80,0x05,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840580 + +mova za1h.s[w14, 0:3], {z0.s - z3.s} // 11000000-10000100-01000100-00000001 +// CHECK-INST: mov za1h.s[w14, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0x44,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844401 + +mova za1h.s[w12, 0:3], {z20.s - z23.s} // 11000000-10000100-00000110-10000001 +// CHECK-INST: mov za1h.s[w12, 0:3], { z20.s - z23.s } +// CHECK-ENCODING: [0x81,0x06,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840681 + +mova za2h.s[w15, 0:3], {z8.s - z11.s} // 11000000-10000100-01100101-00000010 +// CHECK-INST: mov za2h.s[w15, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x02,0x65,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846502 + +mova za3h.s[w13, 0:3], {z12.s - z15.s} // 11000000-10000100-00100101-10000011 +// CHECK-INST: mov za3h.s[w13, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0x25,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0842583 + +// Aliases + +mov za0h.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-00000100-00000000 +// CHECK-INST: mov za0h.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0x04,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840400 + +mov za1h.s[w14, 0:3], {z8.s - z11.s} // 11000000-10000100-01000101-00000001 +// CHECK-INST: mov za1h.s[w14, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x01,0x45,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844501 + +mov za3h.s[w15, 0:3], {z12.s - z15.s} // 11000000-10000100-01100101-10000011 +// CHECK-INST: mov za3h.s[w15, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0x65,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846583 + +mov za3h.s[w15, 0:3], {z28.s - z31.s} // 11000000-10000100-01100111-10000011 +// CHECK-INST: mov za3h.s[w15, 0:3], { z28.s - z31.s } +// CHECK-ENCODING: [0x83,0x67,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846783 + +mov za1h.s[w12, 0:3], {z16.s - z19.s} // 11000000-10000100-00000110-00000001 +// CHECK-INST: mov za1h.s[w12, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x01,0x06,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840601 + +mov za1h.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-00000100-00000001 +// CHECK-INST: mov za1h.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0x04,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840401 + +mov za0h.s[w14, 0:3], {z16.s - z19.s} // 11000000-10000100-01000110-00000000 +// CHECK-INST: mov za0h.s[w14, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x00,0x46,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844600 + +mov za0h.s[w12, 0:3], {z12.s - z15.s} // 11000000-10000100-00000101-10000000 +// CHECK-INST: mov za0h.s[w12, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x80,0x05,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840580 + +mov za1h.s[w14, 0:3], {z0.s - z3.s} // 11000000-10000100-01000100-00000001 +// CHECK-INST: mov za1h.s[w14, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0x44,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0844401 + +mov za1h.s[w12, 0:3], {z20.s - z23.s} // 11000000-10000100-00000110-10000001 +// CHECK-INST: mov za1h.s[w12, 0:3], { z20.s - z23.s } +// CHECK-ENCODING: [0x81,0x06,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0840681 + +mov za2h.s[w15, 0:3], {z8.s - z11.s} // 11000000-10000100-01100101-00000010 +// CHECK-INST: mov za2h.s[w15, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x02,0x65,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0846502 + +mov za3h.s[w13, 0:3], {z12.s - z15.s} // 11000000-10000100-00100101-10000011 +// CHECK-INST: mov za3h.s[w13, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0x25,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0842583 + + +mova za0v.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-10000100-00000000 +// CHECK-INST: mov za0v.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0x84,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848400 + +mova za1v.s[w14, 0:3], {z8.s - z11.s} // 11000000-10000100-11000101-00000001 +// CHECK-INST: mov za1v.s[w14, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x01,0xc5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c501 + +mova za3v.s[w15, 0:3], {z12.s - z15.s} // 11000000-10000100-11100101-10000011 +// CHECK-INST: mov za3v.s[w15, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0xe5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e583 + +mova za3v.s[w15, 0:3], {z28.s - z31.s} // 11000000-10000100-11100111-10000011 +// CHECK-INST: mov za3v.s[w15, 0:3], { z28.s - z31.s } +// CHECK-ENCODING: [0x83,0xe7,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e783 + +mova za1v.s[w12, 0:3], {z16.s - z19.s} // 11000000-10000100-10000110-00000001 +// CHECK-INST: mov za1v.s[w12, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x01,0x86,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848601 + +mova za1v.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-10000100-00000001 +// CHECK-INST: mov za1v.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0x84,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848401 + +mova za0v.s[w14, 0:3], {z16.s - z19.s} // 11000000-10000100-11000110-00000000 +// CHECK-INST: mov za0v.s[w14, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x00,0xc6,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c600 + +mova za0v.s[w12, 0:3], {z12.s - z15.s} // 11000000-10000100-10000101-10000000 +// CHECK-INST: mov za0v.s[w12, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x80,0x85,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848580 + +mova za1v.s[w14, 0:3], {z0.s - z3.s} // 11000000-10000100-11000100-00000001 +// CHECK-INST: mov za1v.s[w14, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0xc4,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c401 + +mova za1v.s[w12, 0:3], {z20.s - z23.s} // 11000000-10000100-10000110-10000001 +// CHECK-INST: mov za1v.s[w12, 0:3], { z20.s - z23.s } +// CHECK-ENCODING: [0x81,0x86,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848681 + +mova za2v.s[w15, 0:3], {z8.s - z11.s} // 11000000-10000100-11100101-00000010 +// CHECK-INST: mov za2v.s[w15, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x02,0xe5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e502 + +mova za3v.s[w13, 0:3], {z12.s - z15.s} // 11000000-10000100-10100101-10000011 +// CHECK-INST: mov za3v.s[w13, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0xa5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084a583 + +// Aliases + +mov za0v.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-10000100-00000000 +// CHECK-INST: mov za0v.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0x84,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848400 + +mov za1v.s[w14, 0:3], {z8.s - z11.s} // 11000000-10000100-11000101-00000001 +// CHECK-INST: mov za1v.s[w14, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x01,0xc5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c501 + +mov za3v.s[w15, 0:3], {z12.s - z15.s} // 11000000-10000100-11100101-10000011 +// CHECK-INST: mov za3v.s[w15, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0xe5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e583 + +mov za3v.s[w15, 0:3], {z28.s - z31.s} // 11000000-10000100-11100111-10000011 +// CHECK-INST: mov za3v.s[w15, 0:3], { z28.s - z31.s } +// CHECK-ENCODING: [0x83,0xe7,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e783 + +mov za1v.s[w12, 0:3], {z16.s - z19.s} // 11000000-10000100-10000110-00000001 +// CHECK-INST: mov za1v.s[w12, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x01,0x86,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848601 + +mov za1v.s[w12, 0:3], {z0.s - z3.s} // 11000000-10000100-10000100-00000001 +// CHECK-INST: mov za1v.s[w12, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0x84,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848401 + +mov za0v.s[w14, 0:3], {z16.s - z19.s} // 11000000-10000100-11000110-00000000 +// CHECK-INST: mov za0v.s[w14, 0:3], { z16.s - z19.s } +// CHECK-ENCODING: [0x00,0xc6,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c600 + +mov za0v.s[w12, 0:3], {z12.s - z15.s} // 11000000-10000100-10000101-10000000 +// CHECK-INST: mov za0v.s[w12, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x80,0x85,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848580 + +mov za1v.s[w14, 0:3], {z0.s - z3.s} // 11000000-10000100-11000100-00000001 +// CHECK-INST: mov za1v.s[w14, 0:3], { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0xc4,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084c401 + +mov za1v.s[w12, 0:3], {z20.s - z23.s} // 11000000-10000100-10000110-10000001 +// CHECK-INST: mov za1v.s[w12, 0:3], { z20.s - z23.s } +// CHECK-ENCODING: [0x81,0x86,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0848681 + +mov za2v.s[w15, 0:3], {z8.s - z11.s} // 11000000-10000100-11100101-00000010 +// CHECK-INST: mov za2v.s[w15, 0:3], { z8.s - z11.s } +// CHECK-ENCODING: [0x02,0xe5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084e502 + +mov za3v.s[w13, 0:3], {z12.s - z15.s} // 11000000-10000100-10100101-10000011 +// CHECK-INST: mov za3v.s[w13, 0:3], { z12.s - z15.s } +// CHECK-ENCODING: [0x83,0xa5,0x84,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c084a583 + + +mova {z0.d - z3.d}, za0h.d[w12, 0:3] // 11000000-11000110-00000100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za0h.d[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60400 + +mova {z20.d - z23.d}, za2h.d[w14, 0:3] // 11000000-11000110-01000100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za2h.d[w14, 0:3] +// CHECK-ENCODING: [0x54,0x44,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64454 + +mova {z20.d - z23.d}, za5h.d[w15, 0:3] // 11000000-11000110-01100100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za5h.d[w15, 0:3] +// CHECK-ENCODING: [0xb4,0x64,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c664b4 + +mova {z28.d - z31.d}, za7h.d[w15, 0:3] // 11000000-11000110-01100100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za7h.d[w15, 0:3] +// CHECK-ENCODING: [0xfc,0x64,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c664fc + +mova {z4.d - z7.d}, za1h.d[w12, 0:3] // 11000000-11000110-00000100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za1h.d[w12, 0:3] +// CHECK-ENCODING: [0x24,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60424 + +mova {z0.d - z3.d}, za1h.d[w12, 0:3] // 11000000-11000110-00000100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1h.d[w12, 0:3] +// CHECK-ENCODING: [0x20,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60420 + +mova {z24.d - z27.d}, za3h.d[w14, 0:3] // 11000000-11000110-01000100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za3h.d[w14, 0:3] +// CHECK-ENCODING: [0x78,0x44,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64478 + +mova {z0.d - z3.d}, za4h.d[w12, 0:3] // 11000000-11000110-00000100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za4h.d[w12, 0:3] +// CHECK-ENCODING: [0x80,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60480 + +mova {z16.d - z19.d}, za1h.d[w14, 0:3] // 11000000-11000110-01000100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za1h.d[w14, 0:3] +// CHECK-ENCODING: [0x30,0x44,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64430 + +mova {z28.d - z31.d}, za6h.d[w12, 0:3] // 11000000-11000110-00000100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za6h.d[w12, 0:3] +// CHECK-ENCODING: [0xdc,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c604dc + +mova {z0.d - z3.d}, za1h.d[w15, 0:3] // 11000000-11000110-01100100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1h.d[w15, 0:3] +// CHECK-ENCODING: [0x20,0x64,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c66420 + +mova {z4.d - z7.d}, za4h.d[w13, 0:3] // 11000000-11000110-00100100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za4h.d[w13, 0:3] +// CHECK-ENCODING: [0x84,0x24,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c62484 + +// Aliases + +mov {z0.d - z3.d}, za0h.d[w12, 0:3] // 11000000-11000110-00000100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za0h.d[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60400 + +mov {z20.d - z23.d}, za2h.d[w14, 0:3] // 11000000-11000110-01000100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za2h.d[w14, 0:3] +// CHECK-ENCODING: [0x54,0x44,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64454 + +mov {z20.d - z23.d}, za5h.d[w15, 0:3] // 11000000-11000110-01100100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za5h.d[w15, 0:3] +// CHECK-ENCODING: [0xb4,0x64,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c664b4 + +mov {z28.d - z31.d}, za7h.d[w15, 0:3] // 11000000-11000110-01100100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za7h.d[w15, 0:3] +// CHECK-ENCODING: [0xfc,0x64,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c664fc + +mov {z4.d - z7.d}, za1h.d[w12, 0:3] // 11000000-11000110-00000100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za1h.d[w12, 0:3] +// CHECK-ENCODING: [0x24,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60424 + +mov {z0.d - z3.d}, za1h.d[w12, 0:3] // 11000000-11000110-00000100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1h.d[w12, 0:3] +// CHECK-ENCODING: [0x20,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60420 + +mov {z24.d - z27.d}, za3h.d[w14, 0:3] // 11000000-11000110-01000100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za3h.d[w14, 0:3] +// CHECK-ENCODING: [0x78,0x44,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64478 + +mov {z0.d - z3.d}, za4h.d[w12, 0:3] // 11000000-11000110-00000100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za4h.d[w12, 0:3] +// CHECK-ENCODING: [0x80,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c60480 + +mov {z16.d - z19.d}, za1h.d[w14, 0:3] // 11000000-11000110-01000100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za1h.d[w14, 0:3] +// CHECK-ENCODING: [0x30,0x44,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c64430 + +mov {z28.d - z31.d}, za6h.d[w12, 0:3] // 11000000-11000110-00000100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za6h.d[w12, 0:3] +// CHECK-ENCODING: [0xdc,0x04,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c604dc + +mov {z0.d - z3.d}, za1h.d[w15, 0:3] // 11000000-11000110-01100100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1h.d[w15, 0:3] +// CHECK-ENCODING: [0x20,0x64,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c66420 + +mov {z4.d - z7.d}, za4h.d[w13, 0:3] // 11000000-11000110-00100100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za4h.d[w13, 0:3] +// CHECK-ENCODING: [0x84,0x24,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c62484 + + +mova {z0.d - z3.d}, za0v.d[w12, 0:3] // 11000000-11000110-10000100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za0v.d[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68400 + +mova {z20.d - z23.d}, za2v.d[w14, 0:3] // 11000000-11000110-11000100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za2v.d[w14, 0:3] +// CHECK-ENCODING: [0x54,0xc4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c454 + +mova {z20.d - z23.d}, za5v.d[w15, 0:3] // 11000000-11000110-11100100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za5v.d[w15, 0:3] +// CHECK-ENCODING: [0xb4,0xe4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e4b4 + +mova {z28.d - z31.d}, za7v.d[w15, 0:3] // 11000000-11000110-11100100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za7v.d[w15, 0:3] +// CHECK-ENCODING: [0xfc,0xe4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e4fc + +mova {z4.d - z7.d}, za1v.d[w12, 0:3] // 11000000-11000110-10000100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za1v.d[w12, 0:3] +// CHECK-ENCODING: [0x24,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68424 + +mova {z0.d - z3.d}, za1v.d[w12, 0:3] // 11000000-11000110-10000100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1v.d[w12, 0:3] +// CHECK-ENCODING: [0x20,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68420 + +mova {z24.d - z27.d}, za3v.d[w14, 0:3] // 11000000-11000110-11000100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za3v.d[w14, 0:3] +// CHECK-ENCODING: [0x78,0xc4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c478 + +mova {z0.d - z3.d}, za4v.d[w12, 0:3] // 11000000-11000110-10000100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za4v.d[w12, 0:3] +// CHECK-ENCODING: [0x80,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68480 + +mova {z16.d - z19.d}, za1v.d[w14, 0:3] // 11000000-11000110-11000100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za1v.d[w14, 0:3] +// CHECK-ENCODING: [0x30,0xc4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c430 + +mova {z28.d - z31.d}, za6v.d[w12, 0:3] // 11000000-11000110-10000100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za6v.d[w12, 0:3] +// CHECK-ENCODING: [0xdc,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c684dc + +mova {z0.d - z3.d}, za1v.d[w15, 0:3] // 11000000-11000110-11100100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1v.d[w15, 0:3] +// CHECK-ENCODING: [0x20,0xe4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e420 + +mova {z4.d - z7.d}, za4v.d[w13, 0:3] // 11000000-11000110-10100100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za4v.d[w13, 0:3] +// CHECK-ENCODING: [0x84,0xa4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6a484 + +// Aliases + +mov {z0.d - z3.d}, za0v.d[w12, 0:3] // 11000000-11000110-10000100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za0v.d[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68400 + +mov {z20.d - z23.d}, za2v.d[w14, 0:3] // 11000000-11000110-11000100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za2v.d[w14, 0:3] +// CHECK-ENCODING: [0x54,0xc4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c454 + +mov {z20.d - z23.d}, za5v.d[w15, 0:3] // 11000000-11000110-11100100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za5v.d[w15, 0:3] +// CHECK-ENCODING: [0xb4,0xe4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e4b4 + +mov {z28.d - z31.d}, za7v.d[w15, 0:3] // 11000000-11000110-11100100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za7v.d[w15, 0:3] +// CHECK-ENCODING: [0xfc,0xe4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e4fc + +mov {z4.d - z7.d}, za1v.d[w12, 0:3] // 11000000-11000110-10000100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za1v.d[w12, 0:3] +// CHECK-ENCODING: [0x24,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68424 + +mov {z0.d - z3.d}, za1v.d[w12, 0:3] // 11000000-11000110-10000100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1v.d[w12, 0:3] +// CHECK-ENCODING: [0x20,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68420 + +mov {z24.d - z27.d}, za3v.d[w14, 0:3] // 11000000-11000110-11000100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za3v.d[w14, 0:3] +// CHECK-ENCODING: [0x78,0xc4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c478 + +mov {z0.d - z3.d}, za4v.d[w12, 0:3] // 11000000-11000110-10000100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za4v.d[w12, 0:3] +// CHECK-ENCODING: [0x80,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c68480 + +mov {z16.d - z19.d}, za1v.d[w14, 0:3] // 11000000-11000110-11000100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za1v.d[w14, 0:3] +// CHECK-ENCODING: [0x30,0xc4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6c430 + +mov {z28.d - z31.d}, za6v.d[w12, 0:3] // 11000000-11000110-10000100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za6v.d[w12, 0:3] +// CHECK-ENCODING: [0xdc,0x84,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c684dc + +mov {z0.d - z3.d}, za1v.d[w15, 0:3] // 11000000-11000110-11100100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za1v.d[w15, 0:3] +// CHECK-ENCODING: [0x20,0xe4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6e420 + +mov {z4.d - z7.d}, za4v.d[w13, 0:3] // 11000000-11000110-10100100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za4v.d[w13, 0:3] +// CHECK-ENCODING: [0x84,0xa4,0xc6,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c6a484 + + +mova {z0.d - z3.d}, za.d[w8, 0, vgx4] // 11000000-00000110-00001100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +// CHECK-ENCODING: [0x00,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c00 + +mova {z0.d - z3.d}, za.d[w8, 0] // 11000000-00000110-00001100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +// CHECK-ENCODING: [0x00,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c00 + +mova {z20.d - z23.d}, za.d[w10, 2, vgx4] // 11000000-00000110-01001100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za.d[w10, 2, vgx4] +// CHECK-ENCODING: [0x54,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c54 + +mova {z20.d - z23.d}, za.d[w10, 2] // 11000000-00000110-01001100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za.d[w10, 2, vgx4] +// CHECK-ENCODING: [0x54,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c54 + +mova {z20.d - z23.d}, za.d[w11, 5, vgx4] // 11000000-00000110-01101100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za.d[w11, 5, vgx4] +// CHECK-ENCODING: [0xb4,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066cb4 + +mova {z20.d - z23.d}, za.d[w11, 5] // 11000000-00000110-01101100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za.d[w11, 5, vgx4] +// CHECK-ENCODING: [0xb4,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066cb4 + +mova {z28.d - z31.d}, za.d[w11, 7, vgx4] // 11000000-00000110-01101100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za.d[w11, 7, vgx4] +// CHECK-ENCODING: [0xfc,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066cfc + +mova {z28.d - z31.d}, za.d[w11, 7] // 11000000-00000110-01101100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za.d[w11, 7, vgx4] +// CHECK-ENCODING: [0xfc,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066cfc + +mova {z4.d - z7.d}, za.d[w8, 1, vgx4] // 11000000-00000110-00001100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za.d[w8, 1, vgx4] +// CHECK-ENCODING: [0x24,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c24 + +mova {z4.d - z7.d}, za.d[w8, 1] // 11000000-00000110-00001100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za.d[w8, 1, vgx4] +// CHECK-ENCODING: [0x24,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c24 + +mova {z0.d - z3.d}, za.d[w8, 1, vgx4] // 11000000-00000110-00001100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 1, vgx4] +// CHECK-ENCODING: [0x20,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c20 + +mova {z0.d - z3.d}, za.d[w8, 1] // 11000000-00000110-00001100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 1, vgx4] +// CHECK-ENCODING: [0x20,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c20 + +mova {z24.d - z27.d}, za.d[w10, 3, vgx4] // 11000000-00000110-01001100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za.d[w10, 3, vgx4] +// CHECK-ENCODING: [0x78,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c78 + +mova {z24.d - z27.d}, za.d[w10, 3] // 11000000-00000110-01001100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za.d[w10, 3, vgx4] +// CHECK-ENCODING: [0x78,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c78 + +mova {z0.d - z3.d}, za.d[w8, 4, vgx4] // 11000000-00000110-00001100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 4, vgx4] +// CHECK-ENCODING: [0x80,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c80 + +mova {z0.d - z3.d}, za.d[w8, 4] // 11000000-00000110-00001100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 4, vgx4] +// CHECK-ENCODING: [0x80,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c80 + +mova {z16.d - z19.d}, za.d[w10, 1, vgx4] // 11000000-00000110-01001100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za.d[w10, 1, vgx4] +// CHECK-ENCODING: [0x30,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c30 + +mova {z16.d - z19.d}, za.d[w10, 1] // 11000000-00000110-01001100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za.d[w10, 1, vgx4] +// CHECK-ENCODING: [0x30,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c30 + +mova {z28.d - z31.d}, za.d[w8, 6, vgx4] // 11000000-00000110-00001100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za.d[w8, 6, vgx4] +// CHECK-ENCODING: [0xdc,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060cdc + +mova {z28.d - z31.d}, za.d[w8, 6] // 11000000-00000110-00001100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za.d[w8, 6, vgx4] +// CHECK-ENCODING: [0xdc,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060cdc + +mova {z0.d - z3.d}, za.d[w11, 1, vgx4] // 11000000-00000110-01101100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w11, 1, vgx4] +// CHECK-ENCODING: [0x20,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066c20 + +mova {z0.d - z3.d}, za.d[w11, 1] // 11000000-00000110-01101100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w11, 1, vgx4] +// CHECK-ENCODING: [0x20,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066c20 + +mova {z4.d - z7.d}, za.d[w9, 4, vgx4] // 11000000-00000110-00101100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za.d[w9, 4, vgx4] +// CHECK-ENCODING: [0x84,0x2c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062c84 + +mova {z4.d - z7.d}, za.d[w9, 4] // 11000000-00000110-00101100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za.d[w9, 4, vgx4] +// CHECK-ENCODING: [0x84,0x2c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062c84 + +// Aliases + +mov {z0.d - z3.d}, za.d[w8, 0, vgx4] // 11000000-00000110-00001100-00000000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 0, vgx4] +// CHECK-ENCODING: [0x00,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c00 + +mov {z20.d - z23.d}, za.d[w10, 2, vgx4] // 11000000-00000110-01001100-01010100 +// CHECK-INST: mov { z20.d - z23.d }, za.d[w10, 2, vgx4] +// CHECK-ENCODING: [0x54,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c54 + +mov {z20.d - z23.d}, za.d[w11, 5, vgx4] // 11000000-00000110-01101100-10110100 +// CHECK-INST: mov { z20.d - z23.d }, za.d[w11, 5, vgx4] +// CHECK-ENCODING: [0xb4,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066cb4 + +mov {z28.d - z31.d}, za.d[w11, 7, vgx4] // 11000000-00000110-01101100-11111100 +// CHECK-INST: mov { z28.d - z31.d }, za.d[w11, 7, vgx4] +// CHECK-ENCODING: [0xfc,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066cfc + +mov {z4.d - z7.d}, za.d[w8, 1, vgx4] // 11000000-00000110-00001100-00100100 +// CHECK-INST: mov { z4.d - z7.d }, za.d[w8, 1, vgx4] +// CHECK-ENCODING: [0x24,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c24 + +mov {z0.d - z3.d}, za.d[w8, 1, vgx4] // 11000000-00000110-00001100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 1, vgx4] +// CHECK-ENCODING: [0x20,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c20 + +mov {z24.d - z27.d}, za.d[w10, 3, vgx4] // 11000000-00000110-01001100-01111000 +// CHECK-INST: mov { z24.d - z27.d }, za.d[w10, 3, vgx4] +// CHECK-ENCODING: [0x78,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c78 + +mov {z0.d - z3.d}, za.d[w8, 4, vgx4] // 11000000-00000110-00001100-10000000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w8, 4, vgx4] +// CHECK-ENCODING: [0x80,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060c80 + +mov {z16.d - z19.d}, za.d[w10, 1, vgx4] // 11000000-00000110-01001100-00110000 +// CHECK-INST: mov { z16.d - z19.d }, za.d[w10, 1, vgx4] +// CHECK-ENCODING: [0x30,0x4c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064c30 + +mov {z28.d - z31.d}, za.d[w8, 6, vgx4] // 11000000-00000110-00001100-11011100 +// CHECK-INST: mov { z28.d - z31.d }, za.d[w8, 6, vgx4] +// CHECK-ENCODING: [0xdc,0x0c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060cdc + +mov {z0.d - z3.d}, za.d[w11, 1, vgx4] // 11000000-00000110-01101100-00100000 +// CHECK-INST: mov { z0.d - z3.d }, za.d[w11, 1, vgx4] +// CHECK-ENCODING: [0x20,0x6c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066c20 + +mov {z4.d - z7.d}, za.d[w9, 4, vgx4] // 11000000-00000110-00101100-10000100 +// CHECK-INST: mov { z4.d - z7.d }, za.d[w9, 4, vgx4] +// CHECK-ENCODING: [0x84,0x2c,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062c84 + + +mova za0h.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-00000100-00000000 +// CHECK-INST: mov za0h.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x04,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40400 + +mova za5h.d[w14, 0:3], {z8.d - z11.d} // 11000000-11000100-01000101-00000101 +// CHECK-INST: mov za5h.d[w14, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0x45,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44505 + +mova za7h.d[w15, 0:3], {z12.d - z15.d} // 11000000-11000100-01100101-10000111 +// CHECK-INST: mov za7h.d[w15, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x65,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46587 + +mova za7h.d[w15, 0:3], {z28.d - z31.d} // 11000000-11000100-01100111-10000111 +// CHECK-INST: mov za7h.d[w15, 0:3], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0x67,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46787 + +mova za5h.d[w12, 0:3], {z16.d - z19.d} // 11000000-11000100-00000110-00000101 +// CHECK-INST: mov za5h.d[w12, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x06,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40605 + +mova za1h.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-00000100-00000001 +// CHECK-INST: mov za1h.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x04,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40401 + +mova za0h.d[w14, 0:3], {z16.d - z19.d} // 11000000-11000100-01000110-00000000 +// CHECK-INST: mov za0h.d[w14, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0x46,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44600 + +mova za0h.d[w12, 0:3], {z12.d - z15.d} // 11000000-11000100-00000101-10000000 +// CHECK-INST: mov za0h.d[w12, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x05,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40580 + +mova za1h.d[w14, 0:3], {z0.d - z3.d} // 11000000-11000100-01000100-00000001 +// CHECK-INST: mov za1h.d[w14, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x44,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44401 + +mova za5h.d[w12, 0:3], {z20.d - z23.d} // 11000000-11000100-00000110-10000101 +// CHECK-INST: mov za5h.d[w12, 0:3], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x06,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40685 + +mova za2h.d[w15, 0:3], {z8.d - z11.d} // 11000000-11000100-01100101-00000010 +// CHECK-INST: mov za2h.d[w15, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0x65,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46502 + +mova za7h.d[w13, 0:3], {z12.d - z15.d} // 11000000-11000100-00100101-10000111 +// CHECK-INST: mov za7h.d[w13, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x25,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c42587 + +// Aliases + +mov za0h.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-00000100-00000000 +// CHECK-INST: mov za0h.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x04,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40400 + +mov za5h.d[w14, 0:3], {z8.d - z11.d} // 11000000-11000100-01000101-00000101 +// CHECK-INST: mov za5h.d[w14, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0x45,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44505 + +mov za7h.d[w15, 0:3], {z12.d - z15.d} // 11000000-11000100-01100101-10000111 +// CHECK-INST: mov za7h.d[w15, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x65,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46587 + +mov za7h.d[w15, 0:3], {z28.d - z31.d} // 11000000-11000100-01100111-10000111 +// CHECK-INST: mov za7h.d[w15, 0:3], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0x67,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46787 + +mov za5h.d[w12, 0:3], {z16.d - z19.d} // 11000000-11000100-00000110-00000101 +// CHECK-INST: mov za5h.d[w12, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x06,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40605 + +mov za1h.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-00000100-00000001 +// CHECK-INST: mov za1h.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x04,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40401 + +mov za0h.d[w14, 0:3], {z16.d - z19.d} // 11000000-11000100-01000110-00000000 +// CHECK-INST: mov za0h.d[w14, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0x46,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44600 + +mov za0h.d[w12, 0:3], {z12.d - z15.d} // 11000000-11000100-00000101-10000000 +// CHECK-INST: mov za0h.d[w12, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x05,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40580 + +mov za1h.d[w14, 0:3], {z0.d - z3.d} // 11000000-11000100-01000100-00000001 +// CHECK-INST: mov za1h.d[w14, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x44,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c44401 + +mov za5h.d[w12, 0:3], {z20.d - z23.d} // 11000000-11000100-00000110-10000101 +// CHECK-INST: mov za5h.d[w12, 0:3], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x06,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c40685 + +mov za2h.d[w15, 0:3], {z8.d - z11.d} // 11000000-11000100-01100101-00000010 +// CHECK-INST: mov za2h.d[w15, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0x65,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c46502 + +mov za7h.d[w13, 0:3], {z12.d - z15.d} // 11000000-11000100-00100101-10000111 +// CHECK-INST: mov za7h.d[w13, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x25,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c42587 + + +mova za0v.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-10000100-00000000 +// CHECK-INST: mov za0v.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x84,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48400 + +mova za5v.d[w14, 0:3], {z8.d - z11.d} // 11000000-11000100-11000101-00000101 +// CHECK-INST: mov za5v.d[w14, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0xc5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c505 + +mova za7v.d[w15, 0:3], {z12.d - z15.d} // 11000000-11000100-11100101-10000111 +// CHECK-INST: mov za7v.d[w15, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0xe5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e587 + +mova za7v.d[w15, 0:3], {z28.d - z31.d} // 11000000-11000100-11100111-10000111 +// CHECK-INST: mov za7v.d[w15, 0:3], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0xe7,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e787 + +mova za5v.d[w12, 0:3], {z16.d - z19.d} // 11000000-11000100-10000110-00000101 +// CHECK-INST: mov za5v.d[w12, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x86,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48605 + +mova za1v.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-10000100-00000001 +// CHECK-INST: mov za1v.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x84,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48401 + +mova za0v.d[w14, 0:3], {z16.d - z19.d} // 11000000-11000100-11000110-00000000 +// CHECK-INST: mov za0v.d[w14, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0xc6,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c600 + +mova za0v.d[w12, 0:3], {z12.d - z15.d} // 11000000-11000100-10000101-10000000 +// CHECK-INST: mov za0v.d[w12, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x85,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48580 + +mova za1v.d[w14, 0:3], {z0.d - z3.d} // 11000000-11000100-11000100-00000001 +// CHECK-INST: mov za1v.d[w14, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0xc4,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c401 + +mova za5v.d[w12, 0:3], {z20.d - z23.d} // 11000000-11000100-10000110-10000101 +// CHECK-INST: mov za5v.d[w12, 0:3], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x86,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48685 + +mova za2v.d[w15, 0:3], {z8.d - z11.d} // 11000000-11000100-11100101-00000010 +// CHECK-INST: mov za2v.d[w15, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0xe5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e502 + +mova za7v.d[w13, 0:3], {z12.d - z15.d} // 11000000-11000100-10100101-10000111 +// CHECK-INST: mov za7v.d[w13, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0xa5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4a587 + +// Aliases + +mov za0v.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-10000100-00000000 +// CHECK-INST: mov za0v.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x84,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48400 + +mov za5v.d[w14, 0:3], {z8.d - z11.d} // 11000000-11000100-11000101-00000101 +// CHECK-INST: mov za5v.d[w14, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0xc5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c505 + +mov za7v.d[w15, 0:3], {z12.d - z15.d} // 11000000-11000100-11100101-10000111 +// CHECK-INST: mov za7v.d[w15, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0xe5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e587 + +mov za7v.d[w15, 0:3], {z28.d - z31.d} // 11000000-11000100-11100111-10000111 +// CHECK-INST: mov za7v.d[w15, 0:3], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0xe7,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e787 + +mov za5v.d[w12, 0:3], {z16.d - z19.d} // 11000000-11000100-10000110-00000101 +// CHECK-INST: mov za5v.d[w12, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x86,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48605 + +mov za1v.d[w12, 0:3], {z0.d - z3.d} // 11000000-11000100-10000100-00000001 +// CHECK-INST: mov za1v.d[w12, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x84,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48401 + +mov za0v.d[w14, 0:3], {z16.d - z19.d} // 11000000-11000100-11000110-00000000 +// CHECK-INST: mov za0v.d[w14, 0:3], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0xc6,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c600 + +mov za0v.d[w12, 0:3], {z12.d - z15.d} // 11000000-11000100-10000101-10000000 +// CHECK-INST: mov za0v.d[w12, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x85,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48580 + +mov za1v.d[w14, 0:3], {z0.d - z3.d} // 11000000-11000100-11000100-00000001 +// CHECK-INST: mov za1v.d[w14, 0:3], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0xc4,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4c401 + +mov za5v.d[w12, 0:3], {z20.d - z23.d} // 11000000-11000100-10000110-10000101 +// CHECK-INST: mov za5v.d[w12, 0:3], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x86,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c48685 + +mov za2v.d[w15, 0:3], {z8.d - z11.d} // 11000000-11000100-11100101-00000010 +// CHECK-INST: mov za2v.d[w15, 0:3], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0xe5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4e502 + +mov za7v.d[w13, 0:3], {z12.d - z15.d} // 11000000-11000100-10100101-10000111 +// CHECK-INST: mov za7v.d[w13, 0:3], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0xa5,0xc4,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0c4a587 + + +mova za.d[w8, 0, vgx4], {z0.d - z3.d} // 11000000-00000100-00001100-00000000 +// CHECK-INST: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x0c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040c00 + +mova za.d[w8, 0], {z0.d - z3.d} // 11000000-00000100-00001100-00000000 +// CHECK-INST: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x0c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040c00 + +mova za.d[w10, 5, vgx4], {z8.d - z11.d} // 11000000-00000100-01001101-00000101 +// CHECK-INST: mov za.d[w10, 5, vgx4], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0x4d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044d05 + +mova za.d[w10, 5], {z8.d - z11.d} // 11000000-00000100-01001101-00000101 +// CHECK-INST: mov za.d[w10, 5, vgx4], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0x4d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044d05 + +mova za.d[w11, 7, vgx4], {z12.d - z15.d} // 11000000-00000100-01101101-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x6d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046d87 + +mova za.d[w11, 7], {z12.d - z15.d} // 11000000-00000100-01101101-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x6d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046d87 + +mova za.d[w11, 7, vgx4], {z28.d - z31.d} // 11000000-00000100-01101111-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx4], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0x6f,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046f87 + +mova za.d[w11, 7], {z28.d - z31.d} // 11000000-00000100-01101111-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx4], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0x6f,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046f87 + +mova za.d[w8, 5, vgx4], {z16.d - z19.d} // 11000000-00000100-00001110-00000101 +// CHECK-INST: mov za.d[w8, 5, vgx4], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x0e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040e05 + +mova za.d[w8, 5], {z16.d - z19.d} // 11000000-00000100-00001110-00000101 +// CHECK-INST: mov za.d[w8, 5, vgx4], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x0e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040e05 + +mova za.d[w8, 1, vgx4], {z0.d - z3.d} // 11000000-00000100-00001100-00000001 +// CHECK-INST: mov za.d[w8, 1, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x0c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040c01 + +mova za.d[w8, 1], {z0.d - z3.d} // 11000000-00000100-00001100-00000001 +// CHECK-INST: mov za.d[w8, 1, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x0c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040c01 + +mova za.d[w10, 0, vgx4], {z16.d - z19.d} // 11000000-00000100-01001110-00000000 +// CHECK-INST: mov za.d[w10, 0, vgx4], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0x4e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044e00 + +mova za.d[w10, 0], {z16.d - z19.d} // 11000000-00000100-01001110-00000000 +// CHECK-INST: mov za.d[w10, 0, vgx4], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0x4e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044e00 + +mova za.d[w8, 0, vgx4], {z12.d - z15.d} // 11000000-00000100-00001101-10000000 +// CHECK-INST: mov za.d[w8, 0, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x0d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040d80 + +mova za.d[w8, 0], {z12.d - z15.d} // 11000000-00000100-00001101-10000000 +// CHECK-INST: mov za.d[w8, 0, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x0d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040d80 + +mova za.d[w10, 1, vgx4], {z0.d - z3.d} // 11000000-00000100-01001100-00000001 +// CHECK-INST: mov za.d[w10, 1, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x4c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044c01 + +mova za.d[w10, 1], {z0.d - z3.d} // 11000000-00000100-01001100-00000001 +// CHECK-INST: mov za.d[w10, 1, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x4c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044c01 + +mova za.d[w8, 5, vgx4], {z20.d - z23.d} // 11000000-00000100-00001110-10000101 +// CHECK-INST: mov za.d[w8, 5, vgx4], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x0e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040e85 + +mova za.d[w8, 5], {z20.d - z23.d} // 11000000-00000100-00001110-10000101 +// CHECK-INST: mov za.d[w8, 5, vgx4], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x0e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040e85 + +mova za.d[w11, 2, vgx4], {z8.d - z11.d} // 11000000-00000100-01101101-00000010 +// CHECK-INST: mov za.d[w11, 2, vgx4], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0x6d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046d02 + +mova za.d[w11, 2], {z8.d - z11.d} // 11000000-00000100-01101101-00000010 +// CHECK-INST: mov za.d[w11, 2, vgx4], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0x6d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046d02 + +mova za.d[w9, 7, vgx4], {z12.d - z15.d} // 11000000-00000100-00101101-10000111 +// CHECK-INST: mov za.d[w9, 7, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x2d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042d87 + +mova za.d[w9, 7], {z12.d - z15.d} // 11000000-00000100-00101101-10000111 +// CHECK-INST: mov za.d[w9, 7, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x2d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042d87 + +// Aliases + +mov za.d[w8, 0, vgx4], {z0.d - z3.d} // 11000000-00000100-00001100-00000000 +// CHECK-INST: mov za.d[w8, 0, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x0c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040c00 + +mov za.d[w10, 5, vgx4], {z8.d - z11.d} // 11000000-00000100-01001101-00000101 +// CHECK-INST: mov za.d[w10, 5, vgx4], { z8.d - z11.d } +// CHECK-ENCODING: [0x05,0x4d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044d05 + +mov za.d[w11, 7, vgx4], {z12.d - z15.d} // 11000000-00000100-01101101-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x6d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046d87 + +mov za.d[w11, 7, vgx4], {z28.d - z31.d} // 11000000-00000100-01101111-10000111 +// CHECK-INST: mov za.d[w11, 7, vgx4], { z28.d - z31.d } +// CHECK-ENCODING: [0x87,0x6f,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046f87 + +mov za.d[w8, 5, vgx4], {z16.d - z19.d} // 11000000-00000100-00001110-00000101 +// CHECK-INST: mov za.d[w8, 5, vgx4], { z16.d - z19.d } +// CHECK-ENCODING: [0x05,0x0e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040e05 + +mov za.d[w8, 1, vgx4], {z0.d - z3.d} // 11000000-00000100-00001100-00000001 +// CHECK-INST: mov za.d[w8, 1, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x0c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040c01 + +mov za.d[w10, 0, vgx4], {z16.d - z19.d} // 11000000-00000100-01001110-00000000 +// CHECK-INST: mov za.d[w10, 0, vgx4], { z16.d - z19.d } +// CHECK-ENCODING: [0x00,0x4e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044e00 + +mov za.d[w8, 0, vgx4], {z12.d - z15.d} // 11000000-00000100-00001101-10000000 +// CHECK-INST: mov za.d[w8, 0, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x80,0x0d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040d80 + +mov za.d[w10, 1, vgx4], {z0.d - z3.d} // 11000000-00000100-01001100-00000001 +// CHECK-INST: mov za.d[w10, 1, vgx4], { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0x4c,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044c01 + +mov za.d[w8, 5, vgx4], {z20.d - z23.d} // 11000000-00000100-00001110-10000101 +// CHECK-INST: mov za.d[w8, 5, vgx4], { z20.d - z23.d } +// CHECK-ENCODING: [0x85,0x0e,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040e85 + +mov za.d[w11, 2, vgx4], {z8.d - z11.d} // 11000000-00000100-01101101-00000010 +// CHECK-INST: mov za.d[w11, 2, vgx4], { z8.d - z11.d } +// CHECK-ENCODING: [0x02,0x6d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046d02 + +mov za.d[w9, 7, vgx4], {z12.d - z15.d} // 11000000-00000100-00101101-10000111 +// CHECK-INST: mov za.d[w9, 7, vgx4], { z12.d - z15.d } +// CHECK-ENCODING: [0x87,0x2d,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042d87 + + +mova {z0.b - z3.b}, za0h.b[w12, 0:3] // 11000000-00000110-00000100-00000000 +// CHECK-INST: mov { z0.b - z3.b }, za0h.b[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060400 + +mova {z20.b - z23.b}, za0h.b[w14, 8:11] // 11000000-00000110-01000100-01010100 +// CHECK-INST: mov { z20.b - z23.b }, za0h.b[w14, 8:11] +// CHECK-ENCODING: [0x54,0x44,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064454 + +mova {z20.b - z23.b}, za0h.b[w15, 4:7] // 11000000-00000110-01100100-00110100 +// CHECK-INST: mov { z20.b - z23.b }, za0h.b[w15, 4:7] +// CHECK-ENCODING: [0x34,0x64,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066434 + +mova {z28.b - z31.b}, za0h.b[w15, 12:15] // 11000000-00000110-01100100-01111100 +// CHECK-INST: mov { z28.b - z31.b }, za0h.b[w15, 12:15] +// CHECK-ENCODING: [0x7c,0x64,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006647c + +mova {z4.b - z7.b}, za0h.b[w12, 4:7] // 11000000-00000110-00000100-00100100 +// CHECK-INST: mov { z4.b - z7.b }, za0h.b[w12, 4:7] +// CHECK-ENCODING: [0x24,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060424 + +mova {z0.b - z3.b}, za0h.b[w12, 4:7] // 11000000-00000110-00000100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0h.b[w12, 4:7] +// CHECK-ENCODING: [0x20,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060420 + +mova {z24.b - z27.b}, za0h.b[w14, 12:15] // 11000000-00000110-01000100-01111000 +// CHECK-INST: mov { z24.b - z27.b }, za0h.b[w14, 12:15] +// CHECK-ENCODING: [0x78,0x44,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064478 + +mova {z16.b - z19.b}, za0h.b[w14, 4:7] // 11000000-00000110-01000100-00110000 +// CHECK-INST: mov { z16.b - z19.b }, za0h.b[w14, 4:7] +// CHECK-ENCODING: [0x30,0x44,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064430 + +mova {z28.b - z31.b}, za0h.b[w12, 8:11] // 11000000-00000110-00000100-01011100 +// CHECK-INST: mov { z28.b - z31.b }, za0h.b[w12, 8:11] +// CHECK-ENCODING: [0x5c,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006045c + +mova {z0.b - z3.b}, za0h.b[w15, 4:7] // 11000000-00000110-01100100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0h.b[w15, 4:7] +// CHECK-ENCODING: [0x20,0x64,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066420 + +mova {z4.b - z7.b}, za0h.b[w13, 0:3] // 11000000-00000110-00100100-00000100 +// CHECK-INST: mov { z4.b - z7.b }, za0h.b[w13, 0:3] +// CHECK-ENCODING: [0x04,0x24,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062404 + +// Aliases + +mov {z0.b - z3.b}, za0h.b[w12, 0:3] // 11000000-00000110-00000100-00000000 +// CHECK-INST: mov { z0.b - z3.b }, za0h.b[w12, 0:3] +// CHECK-ENCODING: [0x00,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060400 + +mov {z20.b - z23.b}, za0h.b[w14, 8:11] // 11000000-00000110-01000100-01010100 +// CHECK-INST: mov { z20.b - z23.b }, za0h.b[w14, 8:11] +// CHECK-ENCODING: [0x54,0x44,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064454 + +mov {z20.b - z23.b}, za0h.b[w15, 4:7] // 11000000-00000110-01100100-00110100 +// CHECK-INST: mov { z20.b - z23.b }, za0h.b[w15, 4:7] +// CHECK-ENCODING: [0x34,0x64,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066434 + +mov {z28.b - z31.b}, za0h.b[w15, 12:15] // 11000000-00000110-01100100-01111100 +// CHECK-INST: mov { z28.b - z31.b }, za0h.b[w15, 12:15] +// CHECK-ENCODING: [0x7c,0x64,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006647c + +mov {z4.b - z7.b}, za0h.b[w12, 4:7] // 11000000-00000110-00000100-00100100 +// CHECK-INST: mov { z4.b - z7.b }, za0h.b[w12, 4:7] +// CHECK-ENCODING: [0x24,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060424 + +mov {z0.b - z3.b}, za0h.b[w12, 4:7] // 11000000-00000110-00000100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0h.b[w12, 4:7] +// CHECK-ENCODING: [0x20,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0060420 + +mov {z24.b - z27.b}, za0h.b[w14, 12:15] // 11000000-00000110-01000100-01111000 +// CHECK-INST: mov { z24.b - z27.b }, za0h.b[w14, 12:15] +// CHECK-ENCODING: [0x78,0x44,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064478 + +mov {z16.b - z19.b}, za0h.b[w14, 4:7] // 11000000-00000110-01000100-00110000 +// CHECK-INST: mov { z16.b - z19.b }, za0h.b[w14, 4:7] +// CHECK-ENCODING: [0x30,0x44,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0064430 + +mov {z28.b - z31.b}, za0h.b[w12, 8:11] // 11000000-00000110-00000100-01011100 +// CHECK-INST: mov { z28.b - z31.b }, za0h.b[w12, 8:11] +// CHECK-ENCODING: [0x5c,0x04,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006045c + +mov {z0.b - z3.b}, za0h.b[w15, 4:7] // 11000000-00000110-01100100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0h.b[w15, 4:7] +// CHECK-ENCODING: [0x20,0x64,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0066420 + +mov {z4.b - z7.b}, za0h.b[w13, 0:3] // 11000000-00000110-00100100-00000100 +// CHECK-INST: mov { z4.b - z7.b }, za0h.b[w13, 0:3] +// CHECK-ENCODING: [0x04,0x24,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0062404 + + +mova {z0.b - z3.b}, za0v.b[w12, 0:3] // 11000000-00000110-10000100-00000000 +// CHECK-INST: mov { z0.b - z3.b }, za0v.b[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068400 + +mova {z20.b - z23.b}, za0v.b[w14, 8:11] // 11000000-00000110-11000100-01010100 +// CHECK-INST: mov { z20.b - z23.b }, za0v.b[w14, 8:11] +// CHECK-ENCODING: [0x54,0xc4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c454 + +mova {z20.b - z23.b}, za0v.b[w15, 4:7] // 11000000-00000110-11100100-00110100 +// CHECK-INST: mov { z20.b - z23.b }, za0v.b[w15, 4:7] +// CHECK-ENCODING: [0x34,0xe4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e434 + +mova {z28.b - z31.b}, za0v.b[w15, 12:15] // 11000000-00000110-11100100-01111100 +// CHECK-INST: mov { z28.b - z31.b }, za0v.b[w15, 12:15] +// CHECK-ENCODING: [0x7c,0xe4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e47c + +mova {z4.b - z7.b}, za0v.b[w12, 4:7] // 11000000-00000110-10000100-00100100 +// CHECK-INST: mov { z4.b - z7.b }, za0v.b[w12, 4:7] +// CHECK-ENCODING: [0x24,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068424 + +mova {z0.b - z3.b}, za0v.b[w12, 4:7] // 11000000-00000110-10000100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0v.b[w12, 4:7] +// CHECK-ENCODING: [0x20,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068420 + +mova {z24.b - z27.b}, za0v.b[w14, 12:15] // 11000000-00000110-11000100-01111000 +// CHECK-INST: mov { z24.b - z27.b }, za0v.b[w14, 12:15] +// CHECK-ENCODING: [0x78,0xc4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c478 + +mova {z16.b - z19.b}, za0v.b[w14, 4:7] // 11000000-00000110-11000100-00110000 +// CHECK-INST: mov { z16.b - z19.b }, za0v.b[w14, 4:7] +// CHECK-ENCODING: [0x30,0xc4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c430 + +mova {z28.b - z31.b}, za0v.b[w12, 8:11] // 11000000-00000110-10000100-01011100 +// CHECK-INST: mov { z28.b - z31.b }, za0v.b[w12, 8:11] +// CHECK-ENCODING: [0x5c,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006845c + +mova {z0.b - z3.b}, za0v.b[w15, 4:7] // 11000000-00000110-11100100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0v.b[w15, 4:7] +// CHECK-ENCODING: [0x20,0xe4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e420 + +mova {z4.b - z7.b}, za0v.b[w13, 0:3] // 11000000-00000110-10100100-00000100 +// CHECK-INST: mov { z4.b - z7.b }, za0v.b[w13, 0:3] +// CHECK-ENCODING: [0x04,0xa4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006a404 + +// Aliases + +mov {z0.b - z3.b}, za0v.b[w12, 0:3] // 11000000-00000110-10000100-00000000 +// CHECK-INST: mov { z0.b - z3.b }, za0v.b[w12, 0:3] +// CHECK-ENCODING: [0x00,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068400 + +mov {z20.b - z23.b}, za0v.b[w14, 8:11] // 11000000-00000110-11000100-01010100 +// CHECK-INST: mov { z20.b - z23.b }, za0v.b[w14, 8:11] +// CHECK-ENCODING: [0x54,0xc4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c454 + +mov {z20.b - z23.b}, za0v.b[w15, 4:7] // 11000000-00000110-11100100-00110100 +// CHECK-INST: mov { z20.b - z23.b }, za0v.b[w15, 4:7] +// CHECK-ENCODING: [0x34,0xe4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e434 + +mov {z28.b - z31.b}, za0v.b[w15, 12:15] // 11000000-00000110-11100100-01111100 +// CHECK-INST: mov { z28.b - z31.b }, za0v.b[w15, 12:15] +// CHECK-ENCODING: [0x7c,0xe4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e47c + +mov {z4.b - z7.b}, za0v.b[w12, 4:7] // 11000000-00000110-10000100-00100100 +// CHECK-INST: mov { z4.b - z7.b }, za0v.b[w12, 4:7] +// CHECK-ENCODING: [0x24,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068424 + +mov {z0.b - z3.b}, za0v.b[w12, 4:7] // 11000000-00000110-10000100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0v.b[w12, 4:7] +// CHECK-ENCODING: [0x20,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0068420 + +mov {z24.b - z27.b}, za0v.b[w14, 12:15] // 11000000-00000110-11000100-01111000 +// CHECK-INST: mov { z24.b - z27.b }, za0v.b[w14, 12:15] +// CHECK-ENCODING: [0x78,0xc4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c478 + +mov {z16.b - z19.b}, za0v.b[w14, 4:7] // 11000000-00000110-11000100-00110000 +// CHECK-INST: mov { z16.b - z19.b }, za0v.b[w14, 4:7] +// CHECK-ENCODING: [0x30,0xc4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006c430 + +mov {z28.b - z31.b}, za0v.b[w12, 8:11] // 11000000-00000110-10000100-01011100 +// CHECK-INST: mov { z28.b - z31.b }, za0v.b[w12, 8:11] +// CHECK-ENCODING: [0x5c,0x84,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006845c + +mov {z0.b - z3.b}, za0v.b[w15, 4:7] // 11000000-00000110-11100100-00100000 +// CHECK-INST: mov { z0.b - z3.b }, za0v.b[w15, 4:7] +// CHECK-ENCODING: [0x20,0xe4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006e420 + +mov {z4.b - z7.b}, za0v.b[w13, 0:3] // 11000000-00000110-10100100-00000100 +// CHECK-INST: mov { z4.b - z7.b }, za0v.b[w13, 0:3] +// CHECK-ENCODING: [0x04,0xa4,0x06,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c006a404 + + +mova za0h.b[w12, 0:3], {z0.b - z3.b} // 11000000-00000100-00000100-00000000 +// CHECK-INST: mov za0h.b[w12, 0:3], { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x04,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040400 + +mova za0h.b[w14, 4:7], {z8.b - z11.b} // 11000000-00000100-01000101-00000001 +// CHECK-INST: mov za0h.b[w14, 4:7], { z8.b - z11.b } +// CHECK-ENCODING: [0x01,0x45,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044501 + +mova za0h.b[w15, 12:15], {z12.b - z15.b} // 11000000-00000100-01100101-10000011 +// CHECK-INST: mov za0h.b[w15, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0x65,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046583 + +mova za0h.b[w15, 12:15], {z28.b - z31.b} // 11000000-00000100-01100111-10000011 +// CHECK-INST: mov za0h.b[w15, 12:15], { z28.b - z31.b } +// CHECK-ENCODING: [0x83,0x67,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046783 + +mova za0h.b[w12, 4:7], {z16.b - z19.b} // 11000000-00000100-00000110-00000001 +// CHECK-INST: mov za0h.b[w12, 4:7], { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x06,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040601 + +mova za0h.b[w12, 4:7], {z0.b - z3.b} // 11000000-00000100-00000100-00000001 +// CHECK-INST: mov za0h.b[w12, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0x04,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040401 + +mova za0h.b[w14, 0:3], {z16.b - z19.b} // 11000000-00000100-01000110-00000000 +// CHECK-INST: mov za0h.b[w14, 0:3], { z16.b - z19.b } +// CHECK-ENCODING: [0x00,0x46,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044600 + +mova za0h.b[w12, 0:3], {z12.b - z15.b} // 11000000-00000100-00000101-10000000 +// CHECK-INST: mov za0h.b[w12, 0:3], { z12.b - z15.b } +// CHECK-ENCODING: [0x80,0x05,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040580 + +mova za0h.b[w14, 4:7], {z0.b - z3.b} // 11000000-00000100-01000100-00000001 +// CHECK-INST: mov za0h.b[w14, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0x44,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044401 + +mova za0h.b[w12, 4:7], {z20.b - z23.b} // 11000000-00000100-00000110-10000001 +// CHECK-INST: mov za0h.b[w12, 4:7], { z20.b - z23.b } +// CHECK-ENCODING: [0x81,0x06,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040681 + +mova za0h.b[w15, 8:11], {z8.b - z11.b} // 11000000-00000100-01100101-00000010 +// CHECK-INST: mov za0h.b[w15, 8:11], { z8.b - z11.b } +// CHECK-ENCODING: [0x02,0x65,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046502 + +mova za0h.b[w13, 12:15], {z12.b - z15.b} // 11000000-00000100-00100101-10000011 +// CHECK-INST: mov za0h.b[w13, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0x25,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042583 + +// Aliases + +mov za0h.b[w12, 0:3], {z0.b - z3.b} // 11000000-00000100-00000100-00000000 +// CHECK-INST: mov za0h.b[w12, 0:3], { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x04,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040400 + +mov za0h.b[w14, 4:7], {z8.b - z11.b} // 11000000-00000100-01000101-00000001 +// CHECK-INST: mov za0h.b[w14, 4:7], { z8.b - z11.b } +// CHECK-ENCODING: [0x01,0x45,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044501 + +mov za0h.b[w15, 12:15], {z12.b - z15.b} // 11000000-00000100-01100101-10000011 +// CHECK-INST: mov za0h.b[w15, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0x65,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046583 + +mov za0h.b[w15, 12:15], {z28.b - z31.b} // 11000000-00000100-01100111-10000011 +// CHECK-INST: mov za0h.b[w15, 12:15], { z28.b - z31.b } +// CHECK-ENCODING: [0x83,0x67,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046783 + +mov za0h.b[w12, 4:7], {z16.b - z19.b} // 11000000-00000100-00000110-00000001 +// CHECK-INST: mov za0h.b[w12, 4:7], { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x06,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040601 + +mov za0h.b[w12, 4:7], {z0.b - z3.b} // 11000000-00000100-00000100-00000001 +// CHECK-INST: mov za0h.b[w12, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0x04,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040401 + +mov za0h.b[w14, 0:3], {z16.b - z19.b} // 11000000-00000100-01000110-00000000 +// CHECK-INST: mov za0h.b[w14, 0:3], { z16.b - z19.b } +// CHECK-ENCODING: [0x00,0x46,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044600 + +mov za0h.b[w12, 0:3], {z12.b - z15.b} // 11000000-00000100-00000101-10000000 +// CHECK-INST: mov za0h.b[w12, 0:3], { z12.b - z15.b } +// CHECK-ENCODING: [0x80,0x05,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040580 + +mov za0h.b[w14, 4:7], {z0.b - z3.b} // 11000000-00000100-01000100-00000001 +// CHECK-INST: mov za0h.b[w14, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0x44,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0044401 + +mov za0h.b[w12, 4:7], {z20.b - z23.b} // 11000000-00000100-00000110-10000001 +// CHECK-INST: mov za0h.b[w12, 4:7], { z20.b - z23.b } +// CHECK-ENCODING: [0x81,0x06,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0040681 + +mov za0h.b[w15, 8:11], {z8.b - z11.b} // 11000000-00000100-01100101-00000010 +// CHECK-INST: mov za0h.b[w15, 8:11], { z8.b - z11.b } +// CHECK-ENCODING: [0x02,0x65,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0046502 + +mov za0h.b[w13, 12:15], {z12.b - z15.b} // 11000000-00000100-00100101-10000011 +// CHECK-INST: mov za0h.b[w13, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0x25,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0042583 + + +mova za0v.b[w12, 0:3], {z0.b - z3.b} // 11000000-00000100-10000100-00000000 +// CHECK-INST: mov za0v.b[w12, 0:3], { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x84,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048400 + +mova za0v.b[w14, 4:7], {z8.b - z11.b} // 11000000-00000100-11000101-00000001 +// CHECK-INST: mov za0v.b[w14, 4:7], { z8.b - z11.b } +// CHECK-ENCODING: [0x01,0xc5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c501 + +mova za0v.b[w15, 12:15], {z12.b - z15.b} // 11000000-00000100-11100101-10000011 +// CHECK-INST: mov za0v.b[w15, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0xe5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e583 + +mova za0v.b[w15, 12:15], {z28.b - z31.b} // 11000000-00000100-11100111-10000011 +// CHECK-INST: mov za0v.b[w15, 12:15], { z28.b - z31.b } +// CHECK-ENCODING: [0x83,0xe7,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e783 + +mova za0v.b[w12, 4:7], {z16.b - z19.b} // 11000000-00000100-10000110-00000001 +// CHECK-INST: mov za0v.b[w12, 4:7], { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x86,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048601 + +mova za0v.b[w12, 4:7], {z0.b - z3.b} // 11000000-00000100-10000100-00000001 +// CHECK-INST: mov za0v.b[w12, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0x84,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048401 + +mova za0v.b[w14, 0:3], {z16.b - z19.b} // 11000000-00000100-11000110-00000000 +// CHECK-INST: mov za0v.b[w14, 0:3], { z16.b - z19.b } +// CHECK-ENCODING: [0x00,0xc6,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c600 + +mova za0v.b[w12, 0:3], {z12.b - z15.b} // 11000000-00000100-10000101-10000000 +// CHECK-INST: mov za0v.b[w12, 0:3], { z12.b - z15.b } +// CHECK-ENCODING: [0x80,0x85,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048580 + +mova za0v.b[w14, 4:7], {z0.b - z3.b} // 11000000-00000100-11000100-00000001 +// CHECK-INST: mov za0v.b[w14, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0xc4,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c401 + +mova za0v.b[w12, 4:7], {z20.b - z23.b} // 11000000-00000100-10000110-10000001 +// CHECK-INST: mov za0v.b[w12, 4:7], { z20.b - z23.b } +// CHECK-ENCODING: [0x81,0x86,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048681 + +mova za0v.b[w15, 8:11], {z8.b - z11.b} // 11000000-00000100-11100101-00000010 +// CHECK-INST: mov za0v.b[w15, 8:11], { z8.b - z11.b } +// CHECK-ENCODING: [0x02,0xe5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e502 + +mova za0v.b[w13, 12:15], {z12.b - z15.b} // 11000000-00000100-10100101-10000011 +// CHECK-INST: mov za0v.b[w13, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0xa5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004a583 + +// Aliases + +mov za0v.b[w12, 0:3], {z0.b - z3.b} // 11000000-00000100-10000100-00000000 +// CHECK-INST: mov za0v.b[w12, 0:3], { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x84,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048400 + +mov za0v.b[w14, 4:7], {z8.b - z11.b} // 11000000-00000100-11000101-00000001 +// CHECK-INST: mov za0v.b[w14, 4:7], { z8.b - z11.b } +// CHECK-ENCODING: [0x01,0xc5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c501 + +mov za0v.b[w15, 12:15], {z12.b - z15.b} // 11000000-00000100-11100101-10000011 +// CHECK-INST: mov za0v.b[w15, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0xe5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e583 + +mov za0v.b[w15, 12:15], {z28.b - z31.b} // 11000000-00000100-11100111-10000011 +// CHECK-INST: mov za0v.b[w15, 12:15], { z28.b - z31.b } +// CHECK-ENCODING: [0x83,0xe7,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e783 + +mov za0v.b[w12, 4:7], {z16.b - z19.b} // 11000000-00000100-10000110-00000001 +// CHECK-INST: mov za0v.b[w12, 4:7], { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x86,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048601 + +mov za0v.b[w12, 4:7], {z0.b - z3.b} // 11000000-00000100-10000100-00000001 +// CHECK-INST: mov za0v.b[w12, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0x84,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048401 + +mov za0v.b[w14, 0:3], {z16.b - z19.b} // 11000000-00000100-11000110-00000000 +// CHECK-INST: mov za0v.b[w14, 0:3], { z16.b - z19.b } +// CHECK-ENCODING: [0x00,0xc6,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c600 + +mov za0v.b[w12, 0:3], {z12.b - z15.b} // 11000000-00000100-10000101-10000000 +// CHECK-INST: mov za0v.b[w12, 0:3], { z12.b - z15.b } +// CHECK-ENCODING: [0x80,0x85,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048580 + +mov za0v.b[w14, 4:7], {z0.b - z3.b} // 11000000-00000100-11000100-00000001 +// CHECK-INST: mov za0v.b[w14, 4:7], { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0xc4,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004c401 + +mov za0v.b[w12, 4:7], {z20.b - z23.b} // 11000000-00000100-10000110-10000001 +// CHECK-INST: mov za0v.b[w12, 4:7], { z20.b - z23.b } +// CHECK-ENCODING: [0x81,0x86,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0048681 + +mov za0v.b[w15, 8:11], {z8.b - z11.b} // 11000000-00000100-11100101-00000010 +// CHECK-INST: mov za0v.b[w15, 8:11], { z8.b - z11.b } +// CHECK-ENCODING: [0x02,0xe5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004e502 + +mov za0v.b[w13, 12:15], {z12.b - z15.b} // 11000000-00000100-10100101-10000011 +// CHECK-INST: mov za0v.b[w13, 12:15], { z12.b - z15.b } +// CHECK-ENCODING: [0x83,0xa5,0x04,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c004a583 + diff --git a/llvm/test/MC/AArch64/SME2/movt-diagnostics.s b/llvm/test/MC/AArch64/SME2/movt-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/movt-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +// index must be a multiple of 8 in range [0, 56]. +// --------------------------------------------------------------------------// + +movt x0, zt0[57] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[57] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[58] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[58] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[64] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[64] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +movt x0, zt0[72] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 8 in range [0, 56]. +// CHECK-NEXT: movt x0, zt0[72] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid zt0 register + +movt x0, zt1[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: unexpected token in argument list +// CHECK-NEXT: movt x0, zt1[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/movt.s b/llvm/test/MC/AArch64/SME2/movt.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/movt.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +movt x0, zt0[0] // 11000000-01001100-00000011-11100000 +// CHECK-INST: movt x0, zt0[0] +// CHECK-ENCODING: [0xe0,0x03,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c03e0 + +movt x21, zt0[40] // 11000000-01001100-01010011-11110101 +// CHECK-INST: movt x21, zt0[40] +// CHECK-ENCODING: [0xf5,0x53,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c53f5 + +movt x23, zt0[48] // 11000000-01001100-01100011-11110111 +// CHECK-INST: movt x23, zt0[48] +// CHECK-ENCODING: [0xf7,0x63,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c63f7 + +movt xzr, zt0[56] // 11000000-01001100-01110011-11111111 +// CHECK-INST: movt xzr, zt0[56] +// CHECK-ENCODING: [0xff,0x73,0x4c,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04c73ff + + +movt zt0[0], x0 // 11000000-01001110-00000011-11100000 +// CHECK-INST: movt zt0[0], x0 +// CHECK-ENCODING: [0xe0,0x03,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e03e0 + +movt zt0[40], x21 // 11000000-01001110-01010011-11110101 +// CHECK-INST: movt zt0[40], x21 +// CHECK-ENCODING: [0xf5,0x53,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e53f5 + +movt zt0[48], x23 // 11000000-01001110-01100011-11110111 +// CHECK-INST: movt zt0[48], x23 +// CHECK-ENCODING: [0xf7,0x63,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e63f7 + +movt zt0[56], xzr // 11000000-01001110-01110011-11111111 +// CHECK-INST: movt zt0[56], xzr +// CHECK-ENCODING: [0xff,0x73,0x4e,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c04e73ff + diff --git a/llvm/test/MC/AArch64/SME2/sclamp-diagnostics.s b/llvm/test/MC/AArch64/SME2/sclamp-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sclamp-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sclamp {z0.b-z2.b}, z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sclamp {z0.b-z2.b}, z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sclamp {z1.s-z2.s}, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: sclamp {z1.s-z2.s}, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sclamp {z0.h-z1.h}, z0.h, z4.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sclamp {z0.h-z1.h}, z0.h, z4.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sclamp.s b/llvm/test/MC/AArch64/SME2/sclamp.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sclamp.s @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sclamp {z0.h, z1.h}, z0.h, z0.h // 11000001-01100000-11000100-00000000 +// CHECK-INST: sclamp { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xc4,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160c400 + +sclamp {z20.h, z21.h}, z10.h, z21.h // 11000001-01110101-11000101-01010100 +// CHECK-INST: sclamp { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xc5,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175c554 + +sclamp {z22.h, z23.h}, z13.h, z8.h // 11000001-01101000-11000101-10110110 +// CHECK-INST: sclamp { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb6,0xc5,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168c5b6 + +sclamp {z30.h, z31.h}, z31.h, z31.h // 11000001-01111111-11000111-11111110 +// CHECK-INST: sclamp { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfe,0xc7,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fc7fe + + +sclamp {z0.s, z1.s}, z0.s, z0.s // 11000001-10100000-11000100-00000000 +// CHECK-INST: sclamp { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xc4,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0c400 + +sclamp {z20.s, z21.s}, z10.s, z21.s // 11000001-10110101-11000101-01010100 +// CHECK-INST: sclamp { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xc5,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5c554 + +sclamp {z22.s, z23.s}, z13.s, z8.s // 11000001-10101000-11000101-10110110 +// CHECK-INST: sclamp { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb6,0xc5,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8c5b6 + +sclamp {z30.s, z31.s}, z31.s, z31.s // 11000001-10111111-11000111-11111110 +// CHECK-INST: sclamp { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfe,0xc7,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfc7fe + + +sclamp {z0.d, z1.d}, z0.d, z0.d // 11000001-11100000-11000100-00000000 +// CHECK-INST: sclamp { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xc4,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0c400 + +sclamp {z20.d, z21.d}, z10.d, z21.d // 11000001-11110101-11000101-01010100 +// CHECK-INST: sclamp { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xc5,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5c554 + +sclamp {z22.d, z23.d}, z13.d, z8.d // 11000001-11101000-11000101-10110110 +// CHECK-INST: sclamp { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xc5,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8c5b6 + +sclamp {z30.d, z31.d}, z31.d, z31.d // 11000001-11111111-11000111-11111110 +// CHECK-INST: sclamp { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xc7,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffc7fe + + +sclamp {z0.b, z1.b}, z0.b, z0.b // 11000001-00100000-11000100-00000000 +// CHECK-INST: sclamp { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xc4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120c400 + +sclamp {z20.b, z21.b}, z10.b, z21.b // 11000001-00110101-11000101-01010100 +// CHECK-INST: sclamp { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x54,0xc5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135c554 + +sclamp {z22.b, z23.b}, z13.b, z8.b // 11000001-00101000-11000101-10110110 +// CHECK-INST: sclamp { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb6,0xc5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128c5b6 + +sclamp {z30.b, z31.b}, z31.b, z31.b // 11000001-00111111-11000111-11111110 +// CHECK-INST: sclamp { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xfe,0xc7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fc7fe + + +sclamp {z0.h - z3.h}, z0.h, z0.h // 11000001-01100000-11001100-00000000 +// CHECK-INST: sclamp { z0.h - z3.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xcc,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160cc00 + +sclamp {z20.h - z23.h}, z10.h, z21.h // 11000001-01110101-11001101-01010100 +// CHECK-INST: sclamp { z20.h - z23.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xcd,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175cd54 + +sclamp {z20.h - z23.h}, z13.h, z8.h // 11000001-01101000-11001101-10110100 +// CHECK-INST: sclamp { z20.h - z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb4,0xcd,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168cdb4 + +sclamp {z28.h - z31.h}, z31.h, z31.h // 11000001-01111111-11001111-11111100 +// CHECK-INST: sclamp { z28.h - z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfc,0xcf,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fcffc + + +sclamp {z0.s - z3.s}, z0.s, z0.s // 11000001-10100000-11001100-00000000 +// CHECK-INST: sclamp { z0.s - z3.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xcc,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0cc00 + +sclamp {z20.s - z23.s}, z10.s, z21.s // 11000001-10110101-11001101-01010100 +// CHECK-INST: sclamp { z20.s - z23.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xcd,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5cd54 + +sclamp {z20.s - z23.s}, z13.s, z8.s // 11000001-10101000-11001101-10110100 +// CHECK-INST: sclamp { z20.s - z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb4,0xcd,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8cdb4 + +sclamp {z28.s - z31.s}, z31.s, z31.s // 11000001-10111111-11001111-11111100 +// CHECK-INST: sclamp { z28.s - z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfc,0xcf,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfcffc + + +sclamp {z0.d - z3.d}, z0.d, z0.d // 11000001-11100000-11001100-00000000 +// CHECK-INST: sclamp { z0.d - z3.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xcc,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0cc00 + +sclamp {z20.d - z23.d}, z10.d, z21.d // 11000001-11110101-11001101-01010100 +// CHECK-INST: sclamp { z20.d - z23.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xcd,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5cd54 + +sclamp {z20.d - z23.d}, z13.d, z8.d // 11000001-11101000-11001101-10110100 +// CHECK-INST: sclamp { z20.d - z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb4,0xcd,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8cdb4 + +sclamp {z28.d - z31.d}, z31.d, z31.d // 11000001-11111111-11001111-11111100 +// CHECK-INST: sclamp { z28.d - z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfc,0xcf,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffcffc + + +sclamp {z0.b - z3.b}, z0.b, z0.b // 11000001-00100000-11001100-00000000 +// CHECK-INST: sclamp { z0.b - z3.b }, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xcc,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120cc00 + +sclamp {z20.b - z23.b}, z10.b, z21.b // 11000001-00110101-11001101-01010100 +// CHECK-INST: sclamp { z20.b - z23.b }, z10.b, z21.b +// CHECK-ENCODING: [0x54,0xcd,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135cd54 + +sclamp {z20.b - z23.b}, z13.b, z8.b // 11000001-00101000-11001101-10110100 +// CHECK-INST: sclamp { z20.b - z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb4,0xcd,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128cdb4 + +sclamp {z28.b - z31.b}, z31.b, z31.b // 11000001-00111111-11001111-11111100 +// CHECK-INST: sclamp { z28.b - z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xfc,0xcf,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fcffc + diff --git a/llvm/test/MC/AArch64/SME2/scvtf-diagnostics.s b/llvm/test/MC/AArch64/SME2/scvtf-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/scvtf-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +scvtf {z0.s-z3.s}, {z0.s-z4.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: scvtf {z0.s-z3.s}, {z0.s-z4.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +scvtf {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: scvtf {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +scvtf {z0.s-z3.s}, {z1.s-z5.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: scvtf {z0.s-z3.s}, {z1.s-z5.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +scvtf {z0.s-z3.s}, {z1.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: scvtf {z0.s-z3.s}, {z1.h-z3.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/scvtf.s b/llvm/test/MC/AArch64/SME2/scvtf.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/scvtf.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +scvtf {z0.s, z1.s}, {z0.s, z1.s} // 11000001-00100010-11100000-00000000 +// CHECK-INST: scvtf { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e000 + +scvtf {z20.s, z21.s}, {z10.s, z11.s} // 11000001-00100010-11100001-01010100 +// CHECK-INST: scvtf { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e154 + +scvtf {z22.s, z23.s}, {z12.s, z13.s} // 11000001-00100010-11100001-10010110 +// CHECK-INST: scvtf { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0x96,0xe1,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e196 + +scvtf {z30.s, z31.s}, {z30.s, z31.s} // 11000001-00100010-11100011-11011110 +// CHECK-INST: scvtf { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0xe3,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e3de + + +scvtf {z0.s - z3.s}, {z0.s - z3.s} // 11000001-00110010-11100000-00000000 +// CHECK-INST: scvtf { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e000 + +scvtf {z20.s - z23.s}, {z8.s - z11.s} // 11000001-00110010-11100001-00010100 +// CHECK-INST: scvtf { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e114 + +scvtf {z20.s - z23.s}, {z12.s - z15.s} // 11000001-00110010-11100001-10010100 +// CHECK-INST: scvtf { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e194 + +scvtf {z28.s - z31.s}, {z28.s - z31.s} // 11000001-00110010-11100011-10011100 +// CHECK-INST: scvtf { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e39c + diff --git a/llvm/test/MC/AArch64/SME2/sdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/sdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sdot-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid select register + +sdot za.s[w7, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: sdot za.s[w7, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sdot za.s[w12, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: sdot za.s[w12, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid select offset + +sdot za.s[w8, -1], {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: sdot za.s[w8, -1], {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sdot za.s[w8, 8], {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: sdot za.s[w8, 8], {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Out of range element index + +sdot za.s[w8, 0], {z0.h-z1.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: sdot za.s[w8, 0], {z0.h-z1.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sdot za.s[w8, 0], {z0.h-z3.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: sdot za.s[w8, 0], {z0.h-z3.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// ZPR range constraint + +sdot za.s[w8, 5], {z0.h-z1.h}, z16.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: sdot za.s[w8, 5], {z0.h-z1.h}, z16.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sdot za.s[w8, 5], {z0.h-z3.h}, z16.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: sdot za.s[w8, 5], {z0.h-z3.h}, z16.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// sdot (multi-single) + +sdot za.s[w8, 5], {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: sdot za.s[w8, 5], {z0.h-z1.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sdot za.s[w8, 5], {z0.h-z3.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: sdot za.s[w8, 5], {z0.h-z3.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sdot.s b/llvm/test/MC/AArch64/SME2/sdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sdot.s @@ -0,0 +1,2624 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x08,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601408 + +sdot za.s[w8, 0], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x08,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601408 + +sdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01001101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x4d,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165554d + +sdot za.s[w10, 5], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01001101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x4d,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165554d + +sdot za.s[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10101111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xaf,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875af + +sdot za.s[w11, 7], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10101111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xaf,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875af + +sdot za.s[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11101111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xef,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77ef + +sdot za.s[w11, 7], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11101111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xef,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77ef + +sdot za.s[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00101101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x2d,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160162d + +sdot za.s[w8, 5], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00101101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x2d,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160162d + +sdot za.s[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00101001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x29,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1429 + +sdot za.s[w8, 1], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00101001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x29,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1429 + +sdot za.s[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01101000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x68,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645668 + +sdot za.s[w10, 0], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01101000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x68,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645668 + +sdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x88,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621588 + +sdot za.s[w8, 0], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x88,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621588 + +sdot za.s[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00101001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x29,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5429 + +sdot za.s[w10, 1], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00101001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x29,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5429 + +sdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11001101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xcd,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16cd + +sdot za.s[w8, 5], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11001101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xcd,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16cd + +sdot za.s[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00101010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x2a,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161752a + +sdot za.s[w11, 2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00101010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x2a,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161752a + +sdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x8f,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b358f + +sdot za.s[w9, 7], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x8f,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b358f + + +sdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501000 + +sdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501000 + +sdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x45,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555545 + +sdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x45,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555545 + +sdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x87,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d87 + +sdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x87,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d87 + +sdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xc7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fc7 + +sdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xc7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fc7 + +sdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x05,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e05 + +sdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x05,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e05 + +sdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x01,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1401 + +sdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x01,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1401 + +sdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01000000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x40,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545640 + +sdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01000000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x40,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545640 + +sdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x80,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521980 + +sdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x80,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521980 + +sdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x01,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5801 + +sdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x01,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5801 + +sdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xc5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1ac5 + +sdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xc5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1ac5 + +sdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x02,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517502 + +sdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x02,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517502 + +sdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x87,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b3987 + +sdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x87,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b3987 + + +sdot za.s[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01408 + +sdot za.s[w8, 0], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01408 + +sdot za.s[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01001101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x4d,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4554d + +sdot za.s[w10, 5], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01001101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x4d,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4554d + +sdot za.s[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10001111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x8f,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8758f + +sdot za.s[w11, 7], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10001111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x8f,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8758f + +sdot za.s[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11001111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xcf,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77cf + +sdot za.s[w11, 7], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11001111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xcf,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77cf + +sdot za.s[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00001101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x0d,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f0160d + +sdot za.s[w8, 5], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00001101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x0d,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f0160d + +sdot za.s[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00001001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x09,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1409 + +sdot za.s[w8, 1], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00001001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x09,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1409 + +sdot za.s[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01001000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x48,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45648 + +sdot za.s[w10, 0], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01001000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x48,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45648 + +sdot za.s[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x88,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21588 + +sdot za.s[w8, 0], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x88,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21588 + +sdot za.s[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00001001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x09,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5409 + +sdot za.s[w10, 1], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00001001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x09,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5409 + +sdot za.s[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11001101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xcd,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16cd + +sdot za.s[w8, 5], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11001101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xcd,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16cd + +sdot za.s[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00001010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x0a,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0750a + +sdot za.s[w11, 2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00001010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x0a,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0750a + +sdot za.s[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x8f,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea358f + +sdot za.s[w9, 7], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x8f,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea358f + + + +sdot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201400 + +sdot za.s[w8, 0], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201400 + +sdot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x45,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255545 + +sdot za.s[w10, 5], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x45,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255545 + +sdot za.s[w11, 7, vgx2], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa7,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875a7 + +sdot za.s[w11, 7], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa7,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875a7 + +sdot za.s[w11, 7, vgx2], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe7,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77e7 + +sdot za.s[w11, 7], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe7,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77e7 + +sdot za.s[w8, 5, vgx2], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x25,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201625 + +sdot za.s[w8, 5], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x25,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201625 + +sdot za.s[w8, 1, vgx2], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x21,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1421 + +sdot za.s[w8, 1], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x21,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1421 + +sdot za.s[w10, 0, vgx2], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01100000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x60,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245660 + +sdot za.s[w10, 0], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01100000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x60,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245660 + +sdot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x80,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221580 + +sdot za.s[w8, 0], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x80,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221580 + +sdot za.s[w10, 1, vgx2], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x21,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5421 + +sdot za.s[w10, 1], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x21,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5421 + +sdot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc5,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16c5 + +sdot za.s[w8, 5], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc5,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16c5 + +sdot za.s[w11, 2, vgx2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x22,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217522 + +sdot za.s[w11, 2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x22,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217522 + +sdot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x87,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3587 + +sdot za.s[w9, 7], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x87,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3587 + + +sdot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00100000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501020 + +sdot za.s[w8, 0], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00100000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501020 + +sdot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01100101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x65,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555565 + +sdot za.s[w10, 5], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01100101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x65,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555565 + +sdot za.s[w11, 7, vgx2], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xa7,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587da7 + +sdot za.s[w11, 7], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xa7,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587da7 + +sdot za.s[w11, 7, vgx2], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xe7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fe7 + +sdot za.s[w11, 7], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11100111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xe7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fe7 + +sdot za.s[w8, 5, vgx2], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x25,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e25 + +sdot za.s[w8, 5], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x25,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e25 + +sdot za.s[w8, 1, vgx2], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x21,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1421 + +sdot za.s[w8, 1], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x21,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1421 + +sdot za.s[w10, 0, vgx2], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01100000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x60,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545660 + +sdot za.s[w10, 0], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01100000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x60,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545660 + +sdot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10100000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xa0,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219a0 + +sdot za.s[w8, 0], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10100000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xa0,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219a0 + +sdot za.s[w10, 1, vgx2], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x21,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5821 + +sdot za.s[w10, 1], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x21,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5821 + +sdot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11100101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xe5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1ae5 + +sdot za.s[w8, 5], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11100101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xe5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1ae5 + +sdot za.s[w11, 2, vgx2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x22,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517522 + +sdot za.s[w11, 2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x22,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517522 + +sdot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10100111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xa7,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39a7 + +sdot za.s[w9, 7], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10100111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xa7,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39a7 + + + +sdot za.s[w8, 0, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001-10100000-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x14,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01400 + +sdot za.s[w8, 0], {z0.b, z1.b}, {z0.b, z1.b} // 11000001-10100000-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x14,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01400 + +sdot za.s[w10, 5, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001-10110100-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x45,0x55,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45545 + +sdot za.s[w10, 5], {z10.b, z11.b}, {z20.b, z21.b} // 11000001-10110100-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x45,0x55,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45545 + +sdot za.s[w11, 7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001-10101000-01110101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x87,0x75,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87587 + +sdot za.s[w11, 7], {z12.b, z13.b}, {z8.b, z9.b} // 11000001-10101000-01110101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x87,0x75,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87587 + +sdot za.s[w11, 7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001-10111110-01110111-11000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc7,0x77,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be77c7 + +sdot za.s[w11, 7], {z30.b, z31.b}, {z30.b, z31.b} // 11000001-10111110-01110111-11000111 +// CHECK-INST: sdot za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc7,0x77,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be77c7 + +sdot za.s[w8, 5, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001-10110000-00010110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x16,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01605 + +sdot za.s[w8, 5], {z16.b, z17.b}, {z16.b, z17.b} // 11000001-10110000-00010110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x16,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01605 + +sdot za.s[w8, 1, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001-10111110-00010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x01,0x14,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1401 + +sdot za.s[w8, 1], {z0.b, z1.b}, {z30.b, z31.b} // 11000001-10111110-00010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x01,0x14,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1401 + +sdot za.s[w10, 0, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001-10110100-01010110-01000000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x40,0x56,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45640 + +sdot za.s[w10, 0], {z18.b, z19.b}, {z20.b, z21.b} // 11000001-10110100-01010110-01000000 +// CHECK-INST: sdot za.s[w10, 0, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x40,0x56,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45640 + +sdot za.s[w8, 0, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001-10100010-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x80,0x15,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21580 + +sdot za.s[w8, 0], {z12.b, z13.b}, {z2.b, z3.b} // 11000001-10100010-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x80,0x15,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21580 + +sdot za.s[w10, 1, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001-10111010-01010100-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x01,0x54,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5401 + +sdot za.s[w10, 1], {z0.b, z1.b}, {z26.b, z27.b} // 11000001-10111010-01010100-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x01,0x54,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5401 + +sdot za.s[w8, 5, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001-10111110-00010110-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x16,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be16c5 + +sdot za.s[w8, 5], {z22.b, z23.b}, {z30.b, z31.b} // 11000001-10111110-00010110-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x16,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be16c5 + +sdot za.s[w11, 2, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001-10100000-01110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x02,0x75,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07502 + +sdot za.s[w11, 2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001-10100000-01110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x02,0x75,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07502 + +sdot za.s[w9, 7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001-10101010-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x87,0x35,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3587 + +sdot za.s[w9, 7], {z12.b, z13.b}, {z10.b, z11.b} // 11000001-10101010-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x87,0x35,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3587 + + +sdot za.d[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601400 + +sdot za.d[w8, 0], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601400 + +sdot za.d[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01000101 +// CHECK-INST: sdot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x45,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1655545 + +sdot za.d[w10, 5], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01000101 +// CHECK-INST: sdot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x45,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1655545 + +sdot za.d[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10100111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa7,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875a7 + +sdot za.d[w11, 7], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10100111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa7,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875a7 + +sdot za.d[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11100111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe7,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77e7 + +sdot za.d[w11, 7], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11100111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe7,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77e7 + +sdot za.d[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00100101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x25,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601625 + +sdot za.d[w8, 5], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00100101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x25,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601625 + +sdot za.d[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00100001 +// CHECK-INST: sdot za.d[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1421 + +sdot za.d[w8, 1], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00100001 +// CHECK-INST: sdot za.d[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1421 + +sdot za.d[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01100000 +// CHECK-INST: sdot za.d[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645660 + +sdot za.d[w10, 0], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01100000 +// CHECK-INST: sdot za.d[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645660 + +sdot za.d[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621580 + +sdot za.d[w8, 0], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621580 + +sdot za.d[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00100001 +// CHECK-INST: sdot za.d[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5421 + +sdot za.d[w10, 1], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00100001 +// CHECK-INST: sdot za.d[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5421 + +sdot za.d[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11000101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc5,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16c5 + +sdot za.d[w8, 5], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11000101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc5,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16c5 + +sdot za.d[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00100010 +// CHECK-INST: sdot za.d[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x22,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1617522 + +sdot za.d[w11, 2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00100010 +// CHECK-INST: sdot za.d[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x22,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1617522 + +sdot za.d[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x87,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b3587 + +sdot za.d[w9, 7], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x87,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b3587 + + +sdot za.d[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-11010000-00000000-00001000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00008 + +sdot za.d[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-11010000-00000000-00001000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00008 + +sdot za.d[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-11010101-01000101-01001101 +// CHECK-INST: sdot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x4d,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5454d + +sdot za.d[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-11010101-01000101-01001101 +// CHECK-INST: sdot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x4d,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5454d + +sdot za.d[w11, 7, vgx2], {z12.h, z13.h}, z8.h[1] // 11000001-11011000-01100101-10001111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z12.h, z13.h }, z8.h[1] +// CHECK-ENCODING: [0x8f,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8658f + +sdot za.d[w11, 7], {z12.h, z13.h}, z8.h[1] // 11000001-11011000-01100101-10001111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z12.h, z13.h }, z8.h[1] +// CHECK-ENCODING: [0x8f,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8658f + +sdot za.d[w11, 7, vgx2], {z30.h, z31.h}, z15.h[1] // 11000001-11011111-01100111-11001111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z30.h, z31.h }, z15.h[1] +// CHECK-ENCODING: [0xcf,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67cf + +sdot za.d[w11, 7], {z30.h, z31.h}, z15.h[1] // 11000001-11011111-01100111-11001111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z30.h, z31.h }, z15.h[1] +// CHECK-ENCODING: [0xcf,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67cf + +sdot za.d[w8, 5, vgx2], {z16.h, z17.h}, z0.h[1] // 11000001-11010000-00000110-00001101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z16.h, z17.h }, z0.h[1] +// CHECK-ENCODING: [0x0d,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0060d + +sdot za.d[w8, 5], {z16.h, z17.h}, z0.h[1] // 11000001-11010000-00000110-00001101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z16.h, z17.h }, z0.h[1] +// CHECK-ENCODING: [0x0d,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0060d + +sdot za.d[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-11011110-00000100-00001001 +// CHECK-INST: sdot za.d[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0409 + +sdot za.d[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-11011110-00000100-00001001 +// CHECK-INST: sdot za.d[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0409 + +sdot za.d[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-11010100-01000110-01001000 +// CHECK-INST: sdot za.d[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x48,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44648 + +sdot za.d[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-11010100-01000110-01001000 +// CHECK-INST: sdot za.d[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x48,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44648 + +sdot za.d[w8, 0, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001-11010010-00000001-10001000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20188 + +sdot za.d[w8, 0], {z12.h, z13.h}, z2.h[0] // 11000001-11010010-00000001-10001000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20188 + +sdot za.d[w10, 1, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001-11011010-01000000-00001001 +// CHECK-INST: sdot za.d[w10, 1, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4009 + +sdot za.d[w10, 1], {z0.h, z1.h}, z10.h[0] // 11000001-11011010-01000000-00001001 +// CHECK-INST: sdot za.d[w10, 1, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4009 + +sdot za.d[w8, 5, vgx2], {z22.h, z23.h}, z14.h[0] // 11000001-11011110-00000010-11001101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h[0] +// CHECK-ENCODING: [0xcd,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02cd + +sdot za.d[w8, 5], {z22.h, z23.h}, z14.h[0] // 11000001-11011110-00000010-11001101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h[0] +// CHECK-ENCODING: [0xcd,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02cd + +sdot za.d[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-11010001-01100101-00001010 +// CHECK-INST: sdot za.d[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1650a + +sdot za.d[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-11010001-01100101-00001010 +// CHECK-INST: sdot za.d[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1650a + +sdot za.d[w9, 7, vgx2], {z12.h, z13.h}, z11.h[0] // 11000001-11011011-00100001-10001111 +// CHECK-INST: sdot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h[0] +// CHECK-ENCODING: [0x8f,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db218f + +sdot za.d[w9, 7], {z12.h, z13.h}, z11.h[0] // 11000001-11011011-00100001-10001111 +// CHECK-INST: sdot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h[0] +// CHECK-ENCODING: [0x8f,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db218f + + +sdot za.d[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01400 + +sdot za.d[w8, 0], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01400 + +sdot za.d[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01000101 +// CHECK-INST: sdot za.d[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x45,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45545 + +sdot za.d[w10, 5], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01000101 +// CHECK-INST: sdot za.d[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x45,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45545 + +sdot za.d[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10000111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x87,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e87587 + +sdot za.d[w11, 7], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10000111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x87,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e87587 + +sdot za.d[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11000111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77c7 + +sdot za.d[w11, 7], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11000111 +// CHECK-INST: sdot za.d[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc7,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77c7 + +sdot za.d[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00000101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f01605 + +sdot za.d[w8, 5], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00000101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x05,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f01605 + +sdot za.d[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00000001 +// CHECK-INST: sdot za.d[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1401 + +sdot za.d[w8, 1], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00000001 +// CHECK-INST: sdot za.d[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1401 + +sdot za.d[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01000000 +// CHECK-INST: sdot za.d[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45640 + +sdot za.d[w10, 0], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01000000 +// CHECK-INST: sdot za.d[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45640 + +sdot za.d[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21580 + +sdot za.d[w8, 0], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21580 + +sdot za.d[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00000001 +// CHECK-INST: sdot za.d[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5401 + +sdot za.d[w10, 1], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00000001 +// CHECK-INST: sdot za.d[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5401 + +sdot za.d[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11000101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc5,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16c5 + +sdot za.d[w8, 5], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11000101 +// CHECK-INST: sdot za.d[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc5,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16c5 + +sdot za.d[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00000010 +// CHECK-INST: sdot za.d[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x02,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e07502 + +sdot za.d[w11, 2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00000010 +// CHECK-INST: sdot za.d[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x02,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e07502 + +sdot za.d[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x87,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea3587 + +sdot za.d[w9, 7], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x87,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea3587 + + +sdot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x08,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701408 + +sdot za.s[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x08,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701408 + +sdot za.s[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01001101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x4d,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175554d + +sdot za.s[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01001101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x4d,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175554d + +sdot za.s[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10101111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xaf,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875af + +sdot za.s[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10101111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xaf,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875af + +sdot za.s[w11, 7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11101111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xef,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77ef + +sdot za.s[w11, 7], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11101111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xef,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77ef + +sdot za.s[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00101101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x2d,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c170162d + +sdot za.s[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00101101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x2d,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c170162d + +sdot za.s[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00101001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x29,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1429 + +sdot za.s[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00101001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x29,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1429 + +sdot za.s[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01101000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x68,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745668 + +sdot za.s[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01101000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x68,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745668 + +sdot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x88,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721588 + +sdot za.s[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x88,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721588 + +sdot za.s[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00101001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x29,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5429 + +sdot za.s[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00101001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x29,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5429 + +sdot za.s[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11001101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xcd,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16cd + +sdot za.s[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11001101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xcd,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16cd + +sdot za.s[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00101010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x2a,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c171752a + +sdot za.s[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00101010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x2a,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c171752a + +sdot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x8f,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b358f + +sdot za.s[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x8f,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b358f + + +sdot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509000 + +sdot za.s[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509000 + +sdot za.s[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00000101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x05,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d505 + +sdot za.s[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00000101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x05,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d505 + +sdot za.s[w11, 7, vgx4], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x87,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd87 + +sdot za.s[w11, 7], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x87,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd87 + +sdot za.s[w11, 7, vgx4], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x87,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff87 + +sdot za.s[w11, 7], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x87,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff87 + +sdot za.s[w8, 5, vgx4], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x05,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e05 + +sdot za.s[w8, 5], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x05,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e05 + +sdot za.s[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x01,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9401 + +sdot za.s[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x01,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9401 + +sdot za.s[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00000000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x00,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d600 + +sdot za.s[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00000000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x00,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d600 + +sdot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x80,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529980 + +sdot za.s[w8, 0], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x80,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529980 + +sdot za.s[w10, 1, vgx4], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x01,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad801 + +sdot za.s[w10, 1], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x01,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad801 + +sdot za.s[w8, 5, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x85,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a85 + +sdot za.s[w8, 5], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x85,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a85 + +sdot za.s[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x02,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f502 + +sdot za.s[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x02,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f502 + +sdot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x87,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb987 + +sdot za.s[w9, 7], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x87,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb987 + +sdot za.s[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11408 + +sdot za.s[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11408 + +sdot za.s[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00001101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x0d,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5550d + +sdot za.s[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00001101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x0d,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5550d + +sdot za.s[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10001111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x8f,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9758f + +sdot za.s[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10001111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x8f,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9758f + +sdot za.s[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10001111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x8f,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd778f + +sdot za.s[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10001111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x8f,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd778f + +sdot za.s[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00001101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x0d,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f1160d + +sdot za.s[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00001101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x0d,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f1160d + +sdot za.s[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00001001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x09,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1409 + +sdot za.s[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00001001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x09,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1409 + +sdot za.s[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00001000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x08,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55608 + +sdot za.s[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00001000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x08,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55608 + +sdot za.s[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x88,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11588 + +sdot za.s[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10001000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x88,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11588 + +sdot za.s[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00001001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x09,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95409 + +sdot za.s[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00001001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x09,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95409 + +sdot za.s[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10001101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x8d,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd168d + +sdot za.s[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10001101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x8d,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd168d + +sdot za.s[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00001010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x0a,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e1750a + +sdot za.s[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00001010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x0a,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e1750a + +sdot za.s[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x8f,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9358f + +sdot za.s[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10001111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x8f,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9358f + + +sdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301400 + +sdot za.s[w8, 0], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301400 + +sdot za.s[w10, 5, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x45,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355545 + +sdot za.s[w10, 5], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01000101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x45,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355545 + +sdot za.s[w11, 7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa7,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875a7 + +sdot za.s[w11, 7], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa7,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875a7 + +sdot za.s[w11, 7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe7,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77e7 + +sdot za.s[w11, 7], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe7,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77e7 + +sdot za.s[w8, 5, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x25,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301625 + +sdot za.s[w8, 5], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x25,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301625 + +sdot za.s[w8, 1, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x21,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1421 + +sdot za.s[w8, 1], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x21,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1421 + +sdot za.s[w10, 0, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01100000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x60,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345660 + +sdot za.s[w10, 0], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01100000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x60,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345660 + +sdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x80,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321580 + +sdot za.s[w8, 0], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x80,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321580 + +sdot za.s[w10, 1, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x21,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5421 + +sdot za.s[w10, 1], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x21,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5421 + +sdot za.s[w8, 5, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc5,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16c5 + +sdot za.s[w8, 5], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc5,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16c5 + +sdot za.s[w11, 2, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x22,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317522 + +sdot za.s[w11, 2], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x22,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317522 + +sdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x87,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3587 + +sdot za.s[w9, 7], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x87,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3587 + + +sdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00100000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509020 + +sdot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00100000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509020 + +sdot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00100101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x25,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d525 + +sdot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00100101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x25,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d525 + +sdot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xa7,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fda7 + +sdot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xa7,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fda7 + +sdot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xa7,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffa7 + +sdot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10100111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xa7,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffa7 + +sdot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x25,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e25 + +sdot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00100101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x25,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e25 + +sdot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x21,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9421 + +sdot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00100001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x21,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9421 + +sdot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00100000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x20,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d620 + +sdot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00100000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x20,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d620 + +sdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10100000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xa0,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299a0 + +sdot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10100000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xa0,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299a0 + +sdot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x21,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad821 + +sdot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00100001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x21,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad821 + +sdot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10100101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xa5,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9aa5 + +sdot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10100101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xa5,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9aa5 + +sdot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x22,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f522 + +sdot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00100010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x22,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f522 + +sdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10100111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xa7,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9a7 + +sdot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10100111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xa7,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9a7 + + +sdot za.s[w8, 0, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x14,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11400 + +sdot za.s[w8, 0], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00010100-00000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x14,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11400 + +sdot za.s[w10, 5, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01010101-00000101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x05,0x55,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55505 + +sdot za.s[w10, 5], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01010101-00000101 +// CHECK-INST: sdot za.s[w10, 5, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x05,0x55,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55505 + +sdot za.s[w11, 7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01110101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x87,0x75,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97587 + +sdot za.s[w11, 7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01110101-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x87,0x75,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97587 + +sdot za.s[w11, 7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01110111-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x87,0x77,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7787 + +sdot za.s[w11, 7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01110111-10000111 +// CHECK-INST: sdot za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x87,0x77,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7787 + +sdot za.s[w8, 5, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00010110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x05,0x16,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11605 + +sdot za.s[w8, 5], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00010110-00000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x05,0x16,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11605 + +sdot za.s[w8, 1, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x01,0x14,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1401 + +sdot za.s[w8, 1], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00010100-00000001 +// CHECK-INST: sdot za.s[w8, 1, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x01,0x14,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1401 + +sdot za.s[w10, 0, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01010110-00000000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x00,0x56,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55600 + +sdot za.s[w10, 0], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01010110-00000000 +// CHECK-INST: sdot za.s[w10, 0, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x00,0x56,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55600 + +sdot za.s[w8, 0, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x80,0x15,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11580 + +sdot za.s[w8, 0], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00010101-10000000 +// CHECK-INST: sdot za.s[w8, 0, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x80,0x15,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11580 + +sdot za.s[w10, 1, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01010100-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x01,0x54,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95401 + +sdot za.s[w10, 1], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01010100-00000001 +// CHECK-INST: sdot za.s[w10, 1, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x01,0x54,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95401 + +sdot za.s[w8, 5, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00010110-10000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x16,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1685 + +sdot za.s[w8, 5], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00010110-10000101 +// CHECK-INST: sdot za.s[w8, 5, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x16,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1685 + +sdot za.s[w11, 2, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x02,0x75,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17502 + +sdot za.s[w11, 2], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01110101-00000010 +// CHECK-INST: sdot za.s[w11, 2, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x02,0x75,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17502 + +sdot za.s[w9, 7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x87,0x35,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93587 + +sdot za.s[w9, 7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00110101-10000111 +// CHECK-INST: sdot za.s[w9, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x87,0x35,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93587 + + +sdot za.d[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701400 + +sdot za.d[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701400 + +sdot za.d[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01000101 +// CHECK-INST: sdot za.d[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x45,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1755545 + +sdot za.d[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01000101 +// CHECK-INST: sdot za.d[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x45,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1755545 + +sdot za.d[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10100111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa7,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875a7 + +sdot za.d[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10100111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa7,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875a7 + +sdot za.d[w11, 7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11100111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe7,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77e7 + +sdot za.d[w11, 7], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11100111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe7,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77e7 + +sdot za.d[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00100101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x25,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701625 + +sdot za.d[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00100101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x25,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701625 + +sdot za.d[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00100001 +// CHECK-INST: sdot za.d[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1421 + +sdot za.d[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00100001 +// CHECK-INST: sdot za.d[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1421 + +sdot za.d[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01100000 +// CHECK-INST: sdot za.d[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745660 + +sdot za.d[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01100000 +// CHECK-INST: sdot za.d[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745660 + +sdot za.d[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721580 + +sdot za.d[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721580 + +sdot za.d[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00100001 +// CHECK-INST: sdot za.d[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5421 + +sdot za.d[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00100001 +// CHECK-INST: sdot za.d[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5421 + +sdot za.d[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11000101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc5,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16c5 + +sdot za.d[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11000101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc5,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16c5 + +sdot za.d[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00100010 +// CHECK-INST: sdot za.d[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x22,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1717522 + +sdot za.d[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00100010 +// CHECK-INST: sdot za.d[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x22,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1717522 + +sdot za.d[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x87,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b3587 + +sdot za.d[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x87,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b3587 + + +sdot za.d[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10000000-00001000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08008 + +sdot za.d[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10000000-00001000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08008 + +sdot za.d[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11000101-00001101 +// CHECK-INST: sdot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x0d,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c50d + +sdot za.d[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11000101-00001101 +// CHECK-INST: sdot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x0d,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c50d + +sdot za.d[w11, 7, vgx4], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11100101-10001111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x8f,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e58f + +sdot za.d[w11, 7], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11100101-10001111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x8f,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e58f + +sdot za.d[w11, 7, vgx4], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11100111-10001111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x8f,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe78f + +sdot za.d[w11, 7], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11100111-10001111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x8f,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe78f + +sdot za.d[w8, 5, vgx4], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10000110-00001101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x0d,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0860d + +sdot za.d[w8, 5], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10000110-00001101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x0d,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0860d + +sdot za.d[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10000100-00001001 +// CHECK-INST: sdot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8409 + +sdot za.d[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10000100-00001001 +// CHECK-INST: sdot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8409 + +sdot za.d[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11000110-00001000 +// CHECK-INST: sdot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x08,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c608 + +sdot za.d[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11000110-00001000 +// CHECK-INST: sdot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x08,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c608 + +sdot za.d[w8, 0, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10000001-10001000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28188 + +sdot za.d[w8, 0], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10000001-10001000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28188 + +sdot za.d[w10, 1, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11000000-00001001 +// CHECK-INST: sdot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac009 + +sdot za.d[w10, 1], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11000000-00001001 +// CHECK-INST: sdot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac009 + +sdot za.d[w8, 5, vgx4], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10000010-10001101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x8d,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de828d + +sdot za.d[w8, 5], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10000010-10001101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x8d,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de828d + +sdot za.d[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11100101-00001010 +// CHECK-INST: sdot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e50a + +sdot za.d[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11100101-00001010 +// CHECK-INST: sdot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e50a + +sdot za.d[w9, 7, vgx4], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10100001-10001111 +// CHECK-INST: sdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x8f,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba18f + +sdot za.d[w9, 7], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10100001-10001111 +// CHECK-INST: sdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x8f,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba18f + + +sdot za.d[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11400 + +sdot za.d[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11400 + +sdot za.d[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00000101 +// CHECK-INST: sdot za.d[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x05,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55505 + +sdot za.d[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00000101 +// CHECK-INST: sdot za.d[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x05,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55505 + +sdot za.d[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10000111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e97587 + +sdot za.d[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10000111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e97587 + +sdot za.d[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10000111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x87,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd7787 + +sdot za.d[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10000111 +// CHECK-INST: sdot za.d[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x87,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd7787 + +sdot za.d[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00000101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x05,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f11605 + +sdot za.d[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00000101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x05,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f11605 + +sdot za.d[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00000001 +// CHECK-INST: sdot za.d[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1401 + +sdot za.d[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00000001 +// CHECK-INST: sdot za.d[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1401 + +sdot za.d[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00000000 +// CHECK-INST: sdot za.d[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55600 + +sdot za.d[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00000000 +// CHECK-INST: sdot za.d[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55600 + +sdot za.d[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11580 + +sdot za.d[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10000000 +// CHECK-INST: sdot za.d[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11580 + +sdot za.d[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00000001 +// CHECK-INST: sdot za.d[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95401 + +sdot za.d[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00000001 +// CHECK-INST: sdot za.d[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95401 + +sdot za.d[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10000101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x85,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1685 + +sdot za.d[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10000101 +// CHECK-INST: sdot za.d[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x85,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1685 + +sdot za.d[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00000010 +// CHECK-INST: sdot za.d[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e17502 + +sdot za.d[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00000010 +// CHECK-INST: sdot za.d[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e17502 + +sdot za.d[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e93587 + +sdot za.d[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10000111 +// CHECK-INST: sdot za.d[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x87,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e93587 + diff --git a/llvm/test/MC/AArch64/SME2/sel-diagnostics.s b/llvm/test/MC/AArch64/SME2/sel-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sel-diagnostics.s @@ -0,0 +1,37 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sel {z0.h-z2.h}, pn8, {z0.h-z1.h}, {z0.h-z1.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sel {z0.h-z2.h}, pn8, {z0.h-z1.h}, {z0.h-z1.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sel {z28.s-z31.s}, pn15, {z26.s-z31.s}, {z28.s-z31.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sel {z28.s-z31.s}, pn15, {z26.s-z31.s}, {z28.s-z31.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sel {z28.d-z31.d}, pn15, {z28.d-z31.d}, {z26.d-z31.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sel {z28.d-z31.d}, pn15, {z28.d-z31.d}, {z26.d-z31.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sel {z1.b-z4.b}, pn8, {z0.b-z3.b}, {z0.b-z3.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: sel {z1.b-z4.b}, pn8, {z0.b-z3.b}, {z0.b-z3.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sel {z22.s-z23.s}, pn11, {z13.s-z14.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: sel {z22.s-z23.s}, pn11, {z13.s-z14.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sel {z0.h-z1.h}, pn8, {z0.h-z1.h}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sel {z0.h-z1.h}, pn8, {z0.h-z1.h}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sel.s b/llvm/test/MC/AArch64/SME2/sel.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sel.s @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sel {z0.h, z1.h}, pn8, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10000000-00000000 +// CHECK-INST: sel { z0.h, z1.h }, pn8, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x80,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1608000 + +sel {z20.h, z21.h}, pn13, {z10.h, z11.h}, {z20.h, z21.h} // 11000001-01110100-10010101-01010100 +// CHECK-INST: sel { z20.h, z21.h }, pn13, { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x54,0x95,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1749554 + +sel {z22.h, z23.h}, pn11, {z12.h, z13.h}, {z8.h, z9.h} // 11000001-01101000-10001101-10010110 +// CHECK-INST: sel { z22.h, z23.h }, pn11, { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x96,0x8d,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1688d96 + +sel {z30.h, z31.h}, pn15, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10011111-11011110 +// CHECK-INST: sel { z30.h, z31.h }, pn15, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xde,0x9f,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e9fde + + +sel {z0.s, z1.s}, pn8, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10000000-00000000 +// CHECK-INST: sel { z0.s, z1.s }, pn8, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0x80,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a08000 + +sel {z20.s, z21.s}, pn13, {z10.s, z11.s}, {z20.s, z21.s} // 11000001-10110100-10010101-01010100 +// CHECK-INST: sel { z20.s, z21.s }, pn13, { z10.s, z11.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x54,0x95,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b49554 + +sel {z22.s, z23.s}, pn11, {z12.s, z13.s}, {z8.s, z9.s} // 11000001-10101000-10001101-10010110 +// CHECK-INST: sel { z22.s, z23.s }, pn11, { z12.s, z13.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x96,0x8d,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a88d96 + +sel {z30.s, z31.s}, pn15, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10011111-11011110 +// CHECK-INST: sel { z30.s, z31.s }, pn15, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xde,0x9f,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be9fde + + +sel {z0.d, z1.d}, pn8, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10000000-00000000 +// CHECK-INST: sel { z0.d, z1.d }, pn8, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0x80,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e08000 + +sel {z20.d, z21.d}, pn13, {z10.d, z11.d}, {z20.d, z21.d} // 11000001-11110100-10010101-01010100 +// CHECK-INST: sel { z20.d, z21.d }, pn13, { z10.d, z11.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x54,0x95,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f49554 + +sel {z22.d, z23.d}, pn11, {z12.d, z13.d}, {z8.d, z9.d} // 11000001-11101000-10001101-10010110 +// CHECK-INST: sel { z22.d, z23.d }, pn11, { z12.d, z13.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x96,0x8d,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e88d96 + +sel {z30.d, z31.d}, pn15, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10011111-11011110 +// CHECK-INST: sel { z30.d, z31.d }, pn15, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0xde,0x9f,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe9fde + + +sel {z0.b, z1.b}, pn8, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10000000-00000000 +// CHECK-INST: sel { z0.b, z1.b }, pn8, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x80,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1208000 + +sel {z20.b, z21.b}, pn13, {z10.b, z11.b}, {z20.b, z21.b} // 11000001-00110100-10010101-01010100 +// CHECK-INST: sel { z20.b, z21.b }, pn13, { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x54,0x95,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1349554 + +sel {z22.b, z23.b}, pn11, {z12.b, z13.b}, {z8.b, z9.b} // 11000001-00101000-10001101-10010110 +// CHECK-INST: sel { z22.b, z23.b }, pn11, { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x96,0x8d,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1288d96 + +sel {z30.b, z31.b}, pn15, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10011111-11011110 +// CHECK-INST: sel { z30.b, z31.b }, pn15, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xde,0x9f,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e9fde + + +sel {z0.h - z3.h}, pn8, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100001-10000000-00000000 +// CHECK-INST: sel { z0.h - z3.h }, pn8, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x80,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1618000 + +sel {z20.h - z23.h}, pn13, {z8.h - z11.h}, {z20.h - z23.h} // 11000001-01110101-10010101-00010100 +// CHECK-INST: sel { z20.h - z23.h }, pn13, { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x14,0x95,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1759514 + +sel {z20.h - z23.h}, pn11, {z12.h - z15.h}, {z8.h - z11.h} // 11000001-01101001-10001101-10010100 +// CHECK-INST: sel { z20.h - z23.h }, pn11, { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x94,0x8d,0x69,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1698d94 + +sel {z28.h - z31.h}, pn15, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111101-10011111-10011100 +// CHECK-INST: sel { z28.h - z31.h }, pn15, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0x9f,0x7d,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17d9f9c + + +sel {z0.s - z3.s}, pn8, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100001-10000000-00000000 +// CHECK-INST: sel { z0.s - z3.s }, pn8, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0x80,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a18000 + +sel {z20.s - z23.s}, pn13, {z8.s - z11.s}, {z20.s - z23.s} // 11000001-10110101-10010101-00010100 +// CHECK-INST: sel { z20.s - z23.s }, pn13, { z8.s - z11.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x14,0x95,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b59514 + +sel {z20.s - z23.s}, pn11, {z12.s - z15.s}, {z8.s - z11.s} // 11000001-10101001-10001101-10010100 +// CHECK-INST: sel { z20.s - z23.s }, pn11, { z12.s - z15.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x94,0x8d,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a98d94 + +sel {z28.s - z31.s}, pn15, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111101-10011111-10011100 +// CHECK-INST: sel { z28.s - z31.s }, pn15, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0x9f,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd9f9c + + +sel {z0.d - z3.d}, pn8, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100001-10000000-00000000 +// CHECK-INST: sel { z0.d - z3.d }, pn8, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0x80,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e18000 + +sel {z20.d - z23.d}, pn13, {z8.d - z11.d}, {z20.d - z23.d} // 11000001-11110101-10010101-00010100 +// CHECK-INST: sel { z20.d - z23.d }, pn13, { z8.d - z11.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x14,0x95,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f59514 + +sel {z20.d - z23.d}, pn11, {z12.d - z15.d}, {z8.d - z11.d} // 11000001-11101001-10001101-10010100 +// CHECK-INST: sel { z20.d - z23.d }, pn11, { z12.d - z15.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x94,0x8d,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e98d94 + +sel {z28.d - z31.d}, pn15, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111101-10011111-10011100 +// CHECK-INST: sel { z28.d - z31.d }, pn15, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9c,0x9f,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd9f9c + + +sel {z0.b - z3.b}, pn8, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100001-10000000-00000000 +// CHECK-INST: sel { z0.b - z3.b }, pn8, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x80,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1218000 + +sel {z20.b - z23.b}, pn13, {z8.b - z11.b}, {z20.b - z23.b} // 11000001-00110101-10010101-00010100 +// CHECK-INST: sel { z20.b - z23.b }, pn13, { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x14,0x95,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1359514 + +sel {z20.b - z23.b}, pn11, {z12.b - z15.b}, {z8.b - z11.b} // 11000001-00101001-10001101-10010100 +// CHECK-INST: sel { z20.b - z23.b }, pn11, { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x94,0x8d,0x29,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1298d94 + +sel {z28.b - z31.b}, pn15, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111101-10011111-10011100 +// CHECK-INST: sel { z28.b - z31.b }, pn15, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9c,0x9f,0x3d,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13d9f9c + diff --git a/llvm/test/MC/AArch64/SME2/smax-diagnostics.s b/llvm/test/MC/AArch64/SME2/smax-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smax-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +smax {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smax {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smax {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: smax {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +smax {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smax {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +smax {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smax {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smax.s b/llvm/test/MC/AArch64/SME2/smax.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smax.s @@ -0,0 +1,413 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smax {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100000-00000000 +// CHECK-INST: smax { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0xa0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a000 + +smax {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100000-00010100 +// CHECK-INST: smax { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x14,0xa0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a014 + +smax {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100000-00010110 +// CHECK-INST: smax { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x16,0xa0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a016 + +smax {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100000-00011110 +// CHECK-INST: smax { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x1e,0xa0,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa01e + + +smax {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110000-00000000 +// CHECK-INST: smax { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xb0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b000 + +smax {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110000-00010100 +// CHECK-INST: smax { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x14,0xb0,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b014 + +smax {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110000-00010110 +// CHECK-INST: smax { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x16,0xb0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b016 + +smax {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110000-00011110 +// CHECK-INST: smax { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x1e,0xb0,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb01e + + +smax {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100000-00000000 +// CHECK-INST: smax { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x00,0xa0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a000 + +smax {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100000-00010100 +// CHECK-INST: smax { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x14,0xa0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a014 + +smax {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100000-00010110 +// CHECK-INST: smax { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x16,0xa0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a016 + +smax {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100000-00011110 +// CHECK-INST: smax { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x1e,0xa0,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa01e + + +smax {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110000-00000000 +// CHECK-INST: smax { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xb0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b000 + +smax {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110000-00010100 +// CHECK-INST: smax { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x14,0xb0,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b014 + +smax {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110000-00010110 +// CHECK-INST: smax { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x16,0xb0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b016 + +smax {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110000-00011110 +// CHECK-INST: smax { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x1e,0xb0,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb01e + + +smax {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100000-00000000 +// CHECK-INST: smax { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x00,0xa0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a000 + +smax {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100000-00010100 +// CHECK-INST: smax { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x14,0xa0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a014 + +smax {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100000-00010110 +// CHECK-INST: smax { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x16,0xa0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a016 + +smax {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100000-00011110 +// CHECK-INST: smax { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x1e,0xa0,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa01e + + +smax {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110000-00000000 +// CHECK-INST: smax { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0xb0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b000 + +smax {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110000-00010100 +// CHECK-INST: smax { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x14,0xb0,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b014 + +smax {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110000-00010110 +// CHECK-INST: smax { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x16,0xb0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b016 + +smax {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110000-00011110 +// CHECK-INST: smax { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x1e,0xb0,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb01e + + +smax {z0.b, z1.b}, {z0.b, z1.b}, z0.b // 11000001-00100000-10100000-00000000 +// CHECK-INST: smax { z0.b, z1.b }, { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0xa0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a000 + +smax {z20.b, z21.b}, {z20.b, z21.b}, z5.b // 11000001-00100101-10100000-00010100 +// CHECK-INST: smax { z20.b, z21.b }, { z20.b, z21.b }, z5.b +// CHECK-ENCODING: [0x14,0xa0,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a014 + +smax {z22.b, z23.b}, {z22.b, z23.b}, z8.b // 11000001-00101000-10100000-00010110 +// CHECK-INST: smax { z22.b, z23.b }, { z22.b, z23.b }, z8.b +// CHECK-ENCODING: [0x16,0xa0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a016 + +smax {z30.b, z31.b}, {z30.b, z31.b}, z15.b // 11000001-00101111-10100000-00011110 +// CHECK-INST: smax { z30.b, z31.b }, { z30.b, z31.b }, z15.b +// CHECK-ENCODING: [0x1e,0xa0,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa01e + + +smax {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110000-00000000 +// CHECK-INST: smax { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0xb0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b000 + +smax {z20.b, z21.b}, {z20.b, z21.b}, {z20.b, z21.b} // 11000001-00110100-10110000-00010100 +// CHECK-INST: smax { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x14,0xb0,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b014 + +smax {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110000-00010110 +// CHECK-INST: smax { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x16,0xb0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b016 + +smax {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110000-00011110 +// CHECK-INST: smax { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x1e,0xb0,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb01e + + +smax {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101000-00000000 +// CHECK-INST: smax { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0xa8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a800 + +smax {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101000-00010100 +// CHECK-INST: smax { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x14,0xa8,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a814 + +smax {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101000-00010100 +// CHECK-INST: smax { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x14,0xa8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a814 + +smax {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101000-00011100 +// CHECK-INST: smax { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x1c,0xa8,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa81c + + +smax {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111000-00000000 +// CHECK-INST: smax { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xb8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b800 + +smax {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111000-00010100 +// CHECK-INST: smax { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x14,0xb8,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b814 + +smax {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111000-00010100 +// CHECK-INST: smax { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x14,0xb8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b814 + +smax {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111000-00011100 +// CHECK-INST: smax { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x1c,0xb8,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb81c + + +smax {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101000-00000000 +// CHECK-INST: smax { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x00,0xa8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a800 + +smax {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101000-00010100 +// CHECK-INST: smax { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x14,0xa8,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a814 + +smax {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101000-00010100 +// CHECK-INST: smax { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x14,0xa8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a814 + +smax {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101000-00011100 +// CHECK-INST: smax { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x1c,0xa8,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa81c + + +smax {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111000-00000000 +// CHECK-INST: smax { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xb8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b800 + +smax {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111000-00010100 +// CHECK-INST: smax { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x14,0xb8,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b814 + +smax {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111000-00010100 +// CHECK-INST: smax { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xb8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b814 + +smax {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111000-00011100 +// CHECK-INST: smax { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x1c,0xb8,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb81c + + +smax {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101000-00000000 +// CHECK-INST: smax { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x00,0xa8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a800 + +smax {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101000-00010100 +// CHECK-INST: smax { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x14,0xa8,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a814 + +smax {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101000-00010100 +// CHECK-INST: smax { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x14,0xa8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a814 + +smax {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101000-00011100 +// CHECK-INST: smax { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x1c,0xa8,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa81c + + +smax {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111000-00000000 +// CHECK-INST: smax { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xb8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b800 + +smax {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111000-00010100 +// CHECK-INST: smax { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x14,0xb8,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b814 + +smax {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111000-00010100 +// CHECK-INST: smax { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x14,0xb8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b814 + +smax {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111000-00011100 +// CHECK-INST: smax { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x1c,0xb8,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb81c + + +smax {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101000-00000000 +// CHECK-INST: smax { z0.b - z3.b }, { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0xa8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a800 + +smax {z20.b - z23.b}, {z20.b - z23.b}, z5.b // 11000001-00100101-10101000-00010100 +// CHECK-INST: smax { z20.b - z23.b }, { z20.b - z23.b }, z5.b +// CHECK-ENCODING: [0x14,0xa8,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a814 + +smax {z20.b - z23.b}, {z20.b - z23.b}, z8.b // 11000001-00101000-10101000-00010100 +// CHECK-INST: smax { z20.b - z23.b }, { z20.b - z23.b }, z8.b +// CHECK-ENCODING: [0x14,0xa8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a814 + +smax {z28.b - z31.b}, {z28.b - z31.b}, z15.b // 11000001-00101111-10101000-00011100 +// CHECK-INST: smax { z28.b - z31.b }, { z28.b - z31.b }, z15.b +// CHECK-ENCODING: [0x1c,0xa8,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa81c + + +smax {z0.b - z3.b}, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100000-10111000-00000000 +// CHECK-INST: smax { z0.b - z3.b }, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0xb8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b800 + +smax {z20.b - z23.b}, {z20.b - z23.b}, {z20.b - z23.b} // 11000001-00110100-10111000-00010100 +// CHECK-INST: smax { z20.b - z23.b }, { z20.b - z23.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x14,0xb8,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b814 + +smax {z20.b - z23.b}, {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00101000-10111000-00010100 +// CHECK-INST: smax { z20.b - z23.b }, { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x14,0xb8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b814 + +smax {z28.b - z31.b}, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111100-10111000-00011100 +// CHECK-INST: smax { z28.b - z31.b }, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x1c,0xb8,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13cb81c + diff --git a/llvm/test/MC/AArch64/SME2/smin-diagnostics.s b/llvm/test/MC/AArch64/SME2/smin-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smin-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +smin {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smin {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smin {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: smin {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +smin {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smin {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +smin {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smin {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smin.s b/llvm/test/MC/AArch64/SME2/smin.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smin.s @@ -0,0 +1,413 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smin {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100000-00100000 +// CHECK-INST: smin { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x20,0xa0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a020 + +smin {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100000-00110100 +// CHECK-INST: smin { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x34,0xa0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a034 + +smin {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100000-00110110 +// CHECK-INST: smin { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x36,0xa0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a036 + +smin {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100000-00111110 +// CHECK-INST: smin { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x3e,0xa0,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa03e + + +smin {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110000-00100000 +// CHECK-INST: smin { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x20,0xb0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b020 + +smin {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110000-00110100 +// CHECK-INST: smin { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x34,0xb0,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b034 + +smin {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110000-00110110 +// CHECK-INST: smin { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x36,0xb0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b036 + +smin {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110000-00111110 +// CHECK-INST: smin { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x3e,0xb0,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb03e + + +smin {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100000-00100000 +// CHECK-INST: smin { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x20,0xa0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a020 + +smin {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100000-00110100 +// CHECK-INST: smin { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x34,0xa0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a034 + +smin {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100000-00110110 +// CHECK-INST: smin { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x36,0xa0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a036 + +smin {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100000-00111110 +// CHECK-INST: smin { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x3e,0xa0,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa03e + + +smin {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110000-00100000 +// CHECK-INST: smin { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x20,0xb0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b020 + +smin {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110000-00110100 +// CHECK-INST: smin { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x34,0xb0,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b034 + +smin {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110000-00110110 +// CHECK-INST: smin { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x36,0xb0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b036 + +smin {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110000-00111110 +// CHECK-INST: smin { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x3e,0xb0,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb03e + + +smin {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100000-00100000 +// CHECK-INST: smin { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x20,0xa0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a020 + +smin {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100000-00110100 +// CHECK-INST: smin { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x34,0xa0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a034 + +smin {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100000-00110110 +// CHECK-INST: smin { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x36,0xa0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a036 + +smin {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100000-00111110 +// CHECK-INST: smin { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x3e,0xa0,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa03e + + +smin {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110000-00100000 +// CHECK-INST: smin { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x20,0xb0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b020 + +smin {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110000-00110100 +// CHECK-INST: smin { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x34,0xb0,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b034 + +smin {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110000-00110110 +// CHECK-INST: smin { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x36,0xb0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b036 + +smin {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110000-00111110 +// CHECK-INST: smin { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x3e,0xb0,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb03e + + +smin {z0.b, z1.b}, {z0.b, z1.b}, z0.b // 11000001-00100000-10100000-00100000 +// CHECK-INST: smin { z0.b, z1.b }, { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x20,0xa0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a020 + +smin {z20.b, z21.b}, {z20.b, z21.b}, z5.b // 11000001-00100101-10100000-00110100 +// CHECK-INST: smin { z20.b, z21.b }, { z20.b, z21.b }, z5.b +// CHECK-ENCODING: [0x34,0xa0,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a034 + +smin {z22.b, z23.b}, {z22.b, z23.b}, z8.b // 11000001-00101000-10100000-00110110 +// CHECK-INST: smin { z22.b, z23.b }, { z22.b, z23.b }, z8.b +// CHECK-ENCODING: [0x36,0xa0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a036 + +smin {z30.b, z31.b}, {z30.b, z31.b}, z15.b // 11000001-00101111-10100000-00111110 +// CHECK-INST: smin { z30.b, z31.b }, { z30.b, z31.b }, z15.b +// CHECK-ENCODING: [0x3e,0xa0,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa03e + + +smin {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110000-00100000 +// CHECK-INST: smin { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x20,0xb0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b020 + +smin {z20.b, z21.b}, {z20.b, z21.b}, {z20.b, z21.b} // 11000001-00110100-10110000-00110100 +// CHECK-INST: smin { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x34,0xb0,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b034 + +smin {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110000-00110110 +// CHECK-INST: smin { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x36,0xb0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b036 + +smin {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110000-00111110 +// CHECK-INST: smin { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x3e,0xb0,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb03e + + +smin {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101000-00100000 +// CHECK-INST: smin { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x20,0xa8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a820 + +smin {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101000-00110100 +// CHECK-INST: smin { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x34,0xa8,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a834 + +smin {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101000-00110100 +// CHECK-INST: smin { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x34,0xa8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a834 + +smin {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101000-00111100 +// CHECK-INST: smin { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x3c,0xa8,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa83c + + +smin {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111000-00100000 +// CHECK-INST: smin { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x20,0xb8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b820 + +smin {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111000-00110100 +// CHECK-INST: smin { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x34,0xb8,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b834 + +smin {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111000-00110100 +// CHECK-INST: smin { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x34,0xb8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b834 + +smin {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111000-00111100 +// CHECK-INST: smin { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x3c,0xb8,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb83c + + +smin {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101000-00100000 +// CHECK-INST: smin { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x20,0xa8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a820 + +smin {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101000-00110100 +// CHECK-INST: smin { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x34,0xa8,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a834 + +smin {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101000-00110100 +// CHECK-INST: smin { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x34,0xa8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a834 + +smin {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101000-00111100 +// CHECK-INST: smin { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x3c,0xa8,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa83c + + +smin {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111000-00100000 +// CHECK-INST: smin { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x20,0xb8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b820 + +smin {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111000-00110100 +// CHECK-INST: smin { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x34,0xb8,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b834 + +smin {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111000-00110100 +// CHECK-INST: smin { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x34,0xb8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b834 + +smin {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111000-00111100 +// CHECK-INST: smin { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x3c,0xb8,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb83c + + +smin {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101000-00100000 +// CHECK-INST: smin { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x20,0xa8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a820 + +smin {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101000-00110100 +// CHECK-INST: smin { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x34,0xa8,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a834 + +smin {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101000-00110100 +// CHECK-INST: smin { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x34,0xa8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a834 + +smin {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101000-00111100 +// CHECK-INST: smin { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x3c,0xa8,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa83c + + +smin {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111000-00100000 +// CHECK-INST: smin { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x20,0xb8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b820 + +smin {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111000-00110100 +// CHECK-INST: smin { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x34,0xb8,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b834 + +smin {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111000-00110100 +// CHECK-INST: smin { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x34,0xb8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b834 + +smin {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111000-00111100 +// CHECK-INST: smin { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x3c,0xb8,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb83c + + +smin {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101000-00100000 +// CHECK-INST: smin { z0.b - z3.b }, { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x20,0xa8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a820 + +smin {z20.b - z23.b}, {z20.b - z23.b}, z5.b // 11000001-00100101-10101000-00110100 +// CHECK-INST: smin { z20.b - z23.b }, { z20.b - z23.b }, z5.b +// CHECK-ENCODING: [0x34,0xa8,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a834 + +smin {z20.b - z23.b}, {z20.b - z23.b}, z8.b // 11000001-00101000-10101000-00110100 +// CHECK-INST: smin { z20.b - z23.b }, { z20.b - z23.b }, z8.b +// CHECK-ENCODING: [0x34,0xa8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a834 + +smin {z28.b - z31.b}, {z28.b - z31.b}, z15.b // 11000001-00101111-10101000-00111100 +// CHECK-INST: smin { z28.b - z31.b }, { z28.b - z31.b }, z15.b +// CHECK-ENCODING: [0x3c,0xa8,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa83c + + +smin {z0.b - z3.b}, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100000-10111000-00100000 +// CHECK-INST: smin { z0.b - z3.b }, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x20,0xb8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b820 + +smin {z20.b - z23.b}, {z20.b - z23.b}, {z20.b - z23.b} // 11000001-00110100-10111000-00110100 +// CHECK-INST: smin { z20.b - z23.b }, { z20.b - z23.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x34,0xb8,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b834 + +smin {z20.b - z23.b}, {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00101000-10111000-00110100 +// CHECK-INST: smin { z20.b - z23.b }, { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x34,0xb8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b834 + +smin {z28.b - z31.b}, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111100-10111000-00111100 +// CHECK-INST: smin { z28.b - z31.b }, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x3c,0xb8,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13cb83c + diff --git a/llvm/test/MC/AArch64/SME2/smlall-diagnostics.s b/llvm/test/MC/AArch64/SME2/smlall-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlall-diagnostics.s @@ -0,0 +1,79 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +smlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: smlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: smlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +smlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: smlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +smlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: smlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.s[w12, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: smlall za.s[w12, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +smlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: smlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +smlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: smlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smlall-diagnostics.s-e b/llvm/test/MC/AArch64/SME2/smlall-diagnostics.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlall-diagnostics.s-e @@ -0,0 +1,79 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +smlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: smlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: smlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +smlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: smlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +smlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.s[w12, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlall za.s[w12, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +smlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: smlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +smlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: smlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smlall.s b/llvm/test/MC/AArch64/SME2/smlall.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlall.s @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smlall za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00000000 +// CHECK-INST: smlall za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x00,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200400 + +smlall za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01000001 +// CHECK-INST: smlall za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x41,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254541 + +smlall za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10100011 +// CHECK-INST: smlall za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xa3,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865a3 + +smlall za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11100011 +// CHECK-INST: smlall za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xe3,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67e3 + +smlall za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x21,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200621 + +smlall za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x21,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0421 + +smlall za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01100000 +// CHECK-INST: smlall za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x60,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244660 + +smlall za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10000000 +// CHECK-INST: smlall za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x80,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220580 + +smlall za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00100001 +// CHECK-INST: smlall za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x21,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4421 + +smlall za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11000001 +// CHECK-INST: smlall za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xc1,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06c1 + +smlall za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00100010 +// CHECK-INST: smlall za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x22,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216522 + +smlall za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10000011 +// CHECK-INST: smlall za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x83,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2583 + + +smlall za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x00,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000000 + +smlall za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01000001 +// CHECK-INST: smlall za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x41,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055541 + +smlall za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10100011 +// CHECK-INST: smlall za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xa3,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108eda3 + +smlall za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11100011 +// CHECK-INST: smlall za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xe3,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10fffe3 + +smlall za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x21,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e21 + +smlall za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x21,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8421 + +smlall za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01100000 +// CHECK-INST: smlall za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x60,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045660 + +smlall za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x80,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021980 + +smlall za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00100001 +// CHECK-INST: smlall za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x21,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac821 + +smlall za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11000001 +// CHECK-INST: smlall za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xc1,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ac1 + +smlall za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00100010 +// CHECK-INST: smlall za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x22,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f522 + +smlall za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10000011 +// CHECK-INST: smlall za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x83,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba983 + + +smlall za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00000000 +// CHECK-INST: smlall za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x00,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600400 + +smlall za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01000001 +// CHECK-INST: smlall za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x41,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654541 + +smlall za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10100011 +// CHECK-INST: smlall za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xa3,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865a3 + +smlall za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11100011 +// CHECK-INST: smlall za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xe3,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67e3 + +smlall za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x21,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600621 + +smlall za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x21,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0421 + +smlall za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01100000 +// CHECK-INST: smlall za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x60,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644660 + +smlall za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10000000 +// CHECK-INST: smlall za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x80,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620580 + +smlall za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00100001 +// CHECK-INST: smlall za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x21,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4421 + +smlall za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11000001 +// CHECK-INST: smlall za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xc1,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06c1 + +smlall za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00100010 +// CHECK-INST: smlall za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x22,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616522 + +smlall za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10000011 +// CHECK-INST: smlall za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x83,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2583 + + +smlall za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800000 + +smlall za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01000001 +// CHECK-INST: smlall za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x41,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854541 + +smlall za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10100011 +// CHECK-INST: smlall za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xa3,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188eda3 + +smlall za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11100011 +// CHECK-INST: smlall za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xe3,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18fefe3 + +smlall za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x21,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e21 + +smlall za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x21,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8421 + +smlall za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01100000 +// CHECK-INST: smlall za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x60,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844660 + +smlall za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x80,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820980 + +smlall za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00100001 +// CHECK-INST: smlall za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x21,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac821 + +smlall za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11000001 +// CHECK-INST: smlall za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xc1,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ac1 + +smlall za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00100010 +// CHECK-INST: smlall za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x22,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e522 + +smlall za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10000011 +// CHECK-INST: smlall za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x83,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba983 + + +smlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200000 + +smlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200000 + +smlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254141 + +smlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254141 + +smlall za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a1 + +smlall za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a1 + +smlall za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e1 + +smlall za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e1 + +smlall za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200221 + +smlall za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200221 + +smlall za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0021 + +smlall za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0021 + +smlall za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01100000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244260 + +smlall za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01100000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244260 + +smlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220180 + +smlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220180 + +smlall za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00100001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4021 + +smlall za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00100001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4021 + +smlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c1 + +smlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c1 + +smlall za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00100000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216120 + +smlall za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00100000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216120 + +smlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2181 + +smlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2181 + + +smlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100000 + +smlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100000 + +smlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01000101 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x45,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154545 + +smlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01000101 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x45,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154545 + +smlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d87 + +smlall za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d87 + +smlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xc7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fc7 + +smlall za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xc7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fc7 + +smlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e05 + +smlall za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e05 + +smlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0401 + +smlall za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0401 + +smlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x40,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144640 + +smlall za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x40,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144640 + +smlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120980 + +smlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120980 + +smlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4801 + +smlall za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4801 + +smlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xc5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ac5 + +smlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xc5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ac5 + +smlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00000010 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116502 + +smlall za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00000010 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116502 + +smlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10000111 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2987 + +smlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10000111 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2987 + + +smlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00000 + +smlall za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00000 + +smlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x41,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44141 + +smlall za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x41,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44141 + +smlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x81,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86181 + +smlall za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x81,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86181 + +smlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c1 + +smlall za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c1 + +smlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x01,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00201 + +smlall za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x01,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00201 + +smlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0001 + +smlall za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0001 + +smlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x40,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44240 + +smlall za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x40,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44240 + +smlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20180 + +smlall za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20180 + +smlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x01,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4001 + +smlall za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x01,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4001 + +smlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c1 + +smlall za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c1 + +smlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00000000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06100 + +smlall za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00000000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06100 + +smlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x81,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2181 + +smlall za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x81,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2181 + + +smlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600000 + +smlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600000 + +smlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654141 + +smlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654141 + +smlall za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a1 + +smlall za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a1 + +smlall za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e1 + +smlall za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e1 + +smlall za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600221 + +smlall za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600221 + +smlall za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0021 + +smlall za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0021 + +smlall za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01100000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644260 + +smlall za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01100000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644260 + +smlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620180 + +smlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620180 + +smlall za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00100001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4021 + +smlall za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00100001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4021 + +smlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c1 + +smlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c1 + +smlall za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00100000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616120 + +smlall za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00100000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616120 + +smlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2181 + +smlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2181 + + +smlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900000 + +smlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900000 + +smlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01000101 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x45,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954545 + +smlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01000101 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x45,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954545 + +smlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986587 + +smlall za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986587 + +smlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xc7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67c7 + +smlall za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xc7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67c7 + +smlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900605 + +smlall za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900605 + +smlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0401 + +smlall za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0401 + +smlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x40,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944640 + +smlall za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x40,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944640 + +smlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920180 + +smlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920180 + +smlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4001 + +smlall za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4001 + +smlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xc5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02c5 + +smlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xc5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02c5 + +smlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00000010 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916502 + +smlall za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00000010 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916502 + +smlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10000111 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2187 + +smlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10000111 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2187 + + +smlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00000 + +smlall za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00000 + +smlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44141 + +smlall za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44141 + +smlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x81,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86181 + +smlall za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x81,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86181 + +smlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c1 + +smlall za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c1 + +smlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x01,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00201 + +smlall za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x01,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00201 + +smlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0001 + +smlall za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0001 + +smlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44240 + +smlall za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44240 + +smlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20180 + +smlall za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20180 + +smlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4001 + +smlall za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4001 + +smlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c1 + +smlall za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c1 + +smlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00000000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06100 + +smlall za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00000000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06100 + +smlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x81,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2181 + +smlall za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x81,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2181 + + +smlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300000 + +smlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300000 + +smlall za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354141 + +smlall za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354141 + +smlall za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a1 + +smlall za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a1 + +smlall za.s[w11, 4:7, vgx4], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e1 + +smlall za.s[w11, 4:7], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e1 + +smlall za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300221 + +smlall za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300221 + +smlall za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0021 + +smlall za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0021 + +smlall za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344260 + +smlall za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344260 + +smlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320180 + +smlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320180 + +smlall za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4021 + +smlall za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4021 + +smlall za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c1 + +smlall za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c1 + +smlall za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316120 + +smlall za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316120 + +smlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2181 + +smlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2181 + + +smlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108000 + +smlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108000 + +smlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00000101 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x05,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c505 + +smlall za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00000101 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x05,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c505 + +smlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed87 + +smlall za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed87 + +smlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x87,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef87 + +smlall za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x87,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef87 + +smlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e05 + +smlall za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e05 + +smlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8401 + +smlall za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8401 + +smlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x00,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c600 + +smlall za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x00,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c600 + +smlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128980 + +smlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128980 + +smlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac801 + +smlall za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac801 + +smlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x85,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a85 + +smlall za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x85,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a85 + +smlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00000010 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e502 + +smlall za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00000010 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e502 + +smlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10000111 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba987 + +smlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10000111 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba987 + + +smlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10000 + +smlall za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10000 + +smlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x01,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54101 + +smlall za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x01,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54101 + +smlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96181 + +smlall za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96181 + +smlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6381 + +smlall za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6381 + +smlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10201 + +smlall za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10201 + +smlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0001 + +smlall za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0001 + +smlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x00,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54200 + +smlall za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x00,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54200 + +smlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10180 + +smlall za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10180 + +smlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x01,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94001 + +smlall za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x01,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94001 + +smlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0281 + +smlall za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0281 + +smlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16100 + +smlall za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16100 + +smlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92181 + +smlall za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92181 + + +smlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700000 + +smlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700000 + +smlall za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754141 + +smlall za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754141 + +smlall za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a1 + +smlall za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a1 + +smlall za.d[w11, 4:7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e1 + +smlall za.d[w11, 4:7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e1 + +smlall za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700221 + +smlall za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700221 + +smlall za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0021 + +smlall za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0021 + +smlall za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01100000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744260 + +smlall za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01100000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744260 + +smlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720180 + +smlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720180 + +smlall za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00100001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4021 + +smlall za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00100001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4021 + +smlall za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c1 + +smlall za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c1 + +smlall za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00100000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716120 + +smlall za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00100000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716120 + +smlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2181 + +smlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2181 + + +smlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908000 + +smlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908000 + +smlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00000101 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x05,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c505 + +smlall za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00000101 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x05,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c505 + +smlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e587 + +smlall za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e587 + +smlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x87,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe787 + +smlall za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x87,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe787 + +smlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908605 + +smlall za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908605 + +smlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8401 + +smlall za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8401 + +smlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x00,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c600 + +smlall za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x00,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c600 + +smlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928180 + +smlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928180 + +smlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac001 + +smlall za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac001 + +smlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x85,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8285 + +smlall za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x85,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8285 + +smlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00000010 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e502 + +smlall za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00000010 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e502 + +smlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10000111 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba187 + +smlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10000111 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba187 + + +smlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10000 + +smlall za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10000 + +smlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x01,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54101 + +smlall za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x01,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54101 + +smlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96181 + +smlall za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96181 + +smlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6381 + +smlall za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6381 + +smlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10201 + +smlall za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10201 + +smlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0001 + +smlall za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0001 + +smlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54200 + +smlall za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54200 + +smlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10180 + +smlall za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10180 + +smlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94001 + +smlall za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94001 + +smlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0281 + +smlall za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0281 + +smlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00000000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16100 + +smlall za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00000000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16100 + +smlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92181 + +smlall za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92181 + diff --git a/llvm/test/MC/AArch64/SME2/smlall.s-e b/llvm/test/MC/AArch64/SME2/smlall.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlall.s-e @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smlall za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00000000 +// CHECK-INST: smlall za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x00,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200400 + +smlall za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01000001 +// CHECK-INST: smlall za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x41,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254541 + +smlall za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10100011 +// CHECK-INST: smlall za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xa3,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865a3 + +smlall za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11100011 +// CHECK-INST: smlall za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xe3,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67e3 + +smlall za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x21,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200621 + +smlall za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x21,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0421 + +smlall za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01100000 +// CHECK-INST: smlall za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x60,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244660 + +smlall za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10000000 +// CHECK-INST: smlall za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x80,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220580 + +smlall za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00100001 +// CHECK-INST: smlall za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x21,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4421 + +smlall za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11000001 +// CHECK-INST: smlall za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xc1,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06c1 + +smlall za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00100010 +// CHECK-INST: smlall za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x22,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216522 + +smlall za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10000011 +// CHECK-INST: smlall za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x83,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2583 + + +smlall za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x00,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000000 + +smlall za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01000001 +// CHECK-INST: smlall za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x41,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055541 + +smlall za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10100011 +// CHECK-INST: smlall za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xa3,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108eda3 + +smlall za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11100011 +// CHECK-INST: smlall za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xe3,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10fffe3 + +smlall za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x21,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e21 + +smlall za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00100001 +// CHECK-INST: smlall za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x21,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8421 + +smlall za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01100000 +// CHECK-INST: smlall za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x60,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045660 + +smlall za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x80,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021980 + +smlall za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00100001 +// CHECK-INST: smlall za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x21,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac821 + +smlall za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11000001 +// CHECK-INST: smlall za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xc1,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ac1 + +smlall za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00100010 +// CHECK-INST: smlall za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x22,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f522 + +smlall za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10000011 +// CHECK-INST: smlall za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x83,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba983 + + +smlall za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00000000 +// CHECK-INST: smlall za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x00,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600400 + +smlall za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01000001 +// CHECK-INST: smlall za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x41,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654541 + +smlall za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10100011 +// CHECK-INST: smlall za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xa3,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865a3 + +smlall za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11100011 +// CHECK-INST: smlall za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xe3,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67e3 + +smlall za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x21,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600621 + +smlall za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x21,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0421 + +smlall za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01100000 +// CHECK-INST: smlall za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x60,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644660 + +smlall za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10000000 +// CHECK-INST: smlall za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x80,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620580 + +smlall za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00100001 +// CHECK-INST: smlall za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x21,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4421 + +smlall za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11000001 +// CHECK-INST: smlall za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xc1,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06c1 + +smlall za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00100010 +// CHECK-INST: smlall za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x22,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616522 + +smlall za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10000011 +// CHECK-INST: smlall za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x83,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2583 + + +smlall za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x00,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800000 + +smlall za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01000001 +// CHECK-INST: smlall za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x41,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854541 + +smlall za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10100011 +// CHECK-INST: smlall za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xa3,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188eda3 + +smlall za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11100011 +// CHECK-INST: smlall za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xe3,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18fefe3 + +smlall za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x21,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e21 + +smlall za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00100001 +// CHECK-INST: smlall za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x21,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8421 + +smlall za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01100000 +// CHECK-INST: smlall za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x60,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844660 + +smlall za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x80,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820980 + +smlall za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00100001 +// CHECK-INST: smlall za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x21,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac821 + +smlall za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11000001 +// CHECK-INST: smlall za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xc1,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ac1 + +smlall za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00100010 +// CHECK-INST: smlall za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x22,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e522 + +smlall za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10000011 +// CHECK-INST: smlall za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x83,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba983 + + +smlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200000 + +smlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200000 + +smlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254141 + +smlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254141 + +smlall za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a1 + +smlall za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a1 + +smlall za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e1 + +smlall za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11100001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e1 + +smlall za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200221 + +smlall za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200221 + +smlall za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0021 + +smlall za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00100001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0021 + +smlall za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01100000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244260 + +smlall za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01100000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244260 + +smlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220180 + +smlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220180 + +smlall za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00100001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4021 + +smlall za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00100001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4021 + +smlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c1 + +smlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c1 + +smlall za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00100000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216120 + +smlall za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00100000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216120 + +smlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2181 + +smlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2181 + + +smlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100000 + +smlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100000 + +smlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01000101 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x45,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154545 + +smlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01000101 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x45,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154545 + +smlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d87 + +smlall za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d87 + +smlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xc7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fc7 + +smlall za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11000111 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xc7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fc7 + +smlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e05 + +smlall za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e05 + +smlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0401 + +smlall za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0401 + +smlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x40,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144640 + +smlall za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x40,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144640 + +smlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120980 + +smlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120980 + +smlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4801 + +smlall za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4801 + +smlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xc5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ac5 + +smlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11000101 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xc5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ac5 + +smlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00000010 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116502 + +smlall za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00000010 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116502 + +smlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10000111 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2987 + +smlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10000111 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2987 + + +smlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00000 + +smlall za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00000 + +smlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x41,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44141 + +smlall za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x41,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44141 + +smlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x81,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86181 + +smlall za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x81,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86181 + +smlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c1 + +smlall za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11000001 +// CHECK, INST: smlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c1 + +smlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x01,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00201 + +smlall za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x01,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00201 + +smlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0001 + +smlall za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0001 + +smlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x40,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44240 + +smlall za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01000000 +// CHECK, INST: smlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x40,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44240 + +smlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20180 + +smlall za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10000000 +// CHECK, INST: smlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20180 + +smlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x01,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4001 + +smlall za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00000001 +// CHECK, INST: smlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x01,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4001 + +smlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c1 + +smlall za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11000001 +// CHECK, INST: smlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c1 + +smlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00000000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06100 + +smlall za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00000000 +// CHECK, INST: smlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06100 + +smlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x81,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2181 + +smlall za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10000001 +// CHECK, INST: smlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x81,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2181 + + +smlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600000 + +smlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600000 + +smlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654141 + +smlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654141 + +smlall za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a1 + +smlall za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a1 + +smlall za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e1 + +smlall za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11100001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e1 + +smlall za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600221 + +smlall za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600221 + +smlall za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0021 + +smlall za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00100001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0021 + +smlall za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01100000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644260 + +smlall za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01100000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644260 + +smlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620180 + +smlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620180 + +smlall za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00100001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4021 + +smlall za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00100001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4021 + +smlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c1 + +smlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c1 + +smlall za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00100000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616120 + +smlall za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00100000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616120 + +smlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2181 + +smlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2181 + + +smlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900000 + +smlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900000 + +smlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01000101 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x45,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954545 + +smlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01000101 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x45,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954545 + +smlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986587 + +smlall za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986587 + +smlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xc7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67c7 + +smlall za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11000111 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xc7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67c7 + +smlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900605 + +smlall za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900605 + +smlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0401 + +smlall za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0401 + +smlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x40,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944640 + +smlall za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x40,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944640 + +smlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920180 + +smlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920180 + +smlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4001 + +smlall za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4001 + +smlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xc5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02c5 + +smlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11000101 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xc5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02c5 + +smlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00000010 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916502 + +smlall za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00000010 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916502 + +smlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10000111 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2187 + +smlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10000111 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2187 + + +smlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00000 + +smlall za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00000 + +smlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44141 + +smlall za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x41,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44141 + +smlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x81,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86181 + +smlall za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x81,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86181 + +smlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c1 + +smlall za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11000001 +// CHECK, INST: smlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c1 + +smlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x01,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00201 + +smlall za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x01,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00201 + +smlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0001 + +smlall za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0001 + +smlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44240 + +smlall za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01000000 +// CHECK, INST: smlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x40,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44240 + +smlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20180 + +smlall za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10000000 +// CHECK, INST: smlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20180 + +smlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4001 + +smlall za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00000001 +// CHECK, INST: smlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x01,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4001 + +smlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c1 + +smlall za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11000001 +// CHECK, INST: smlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c1 + +smlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00000000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06100 + +smlall za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00000000 +// CHECK, INST: smlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06100 + +smlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x81,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2181 + +smlall za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10000001 +// CHECK, INST: smlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x81,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2181 + + +smlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300000 + +smlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x00,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300000 + +smlall za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354141 + +smlall za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x41,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354141 + +smlall za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a1 + +smlall za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a1 + +smlall za.s[w11, 4:7, vgx4], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e1 + +smlall za.s[w11, 4:7], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11100001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e1 + +smlall za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300221 + +smlall za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x21,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300221 + +smlall za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0021 + +smlall za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x21,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0021 + +smlall za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344260 + +smlall za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x60,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344260 + +smlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320180 + +smlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x80,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320180 + +smlall za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4021 + +smlall za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x21,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4021 + +smlall za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c1 + +smlall za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c1 + +smlall za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316120 + +smlall za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x20,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316120 + +smlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2181 + +smlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x81,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2181 + + +smlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108000 + +smlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x00,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108000 + +smlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00000101 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x05,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c505 + +smlall za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00000101 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x05,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c505 + +smlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed87 + +smlall za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x87,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed87 + +smlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x87,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef87 + +smlall za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10000111 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x87,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef87 + +smlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e05 + +smlall za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x05,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e05 + +smlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8401 + +smlall za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x01,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8401 + +smlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x00,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c600 + +smlall za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x00,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c600 + +smlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128980 + +smlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x80,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128980 + +smlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac801 + +smlall za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x01,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac801 + +smlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x85,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a85 + +smlall za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10000101 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x85,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a85 + +smlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00000010 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e502 + +smlall za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00000010 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x02,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e502 + +smlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10000111 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba987 + +smlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10000111 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x87,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba987 + + +smlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10000 + +smlall za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10000 + +smlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x01,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54101 + +smlall za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x01,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54101 + +smlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96181 + +smlall za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96181 + +smlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6381 + +smlall za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000001 +// CHECK-INST: smlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6381 + +smlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10201 + +smlall za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x01,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10201 + +smlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0001 + +smlall za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x01,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0001 + +smlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x00,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54200 + +smlall za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000000 +// CHECK-INST: smlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x00,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54200 + +smlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10180 + +smlall za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000000 +// CHECK-INST: smlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x80,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10180 + +smlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x01,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94001 + +smlall za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000001 +// CHECK-INST: smlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x01,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94001 + +smlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0281 + +smlall za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000001 +// CHECK-INST: smlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x81,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0281 + +smlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16100 + +smlall za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000000 +// CHECK-INST: smlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16100 + +smlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92181 + +smlall za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000001 +// CHECK-INST: smlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x81,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92181 + + +smlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700000 + +smlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x00,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700000 + +smlall za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754141 + +smlall za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x41,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754141 + +smlall za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a1 + +smlall za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a1 + +smlall za.d[w11, 4:7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e1 + +smlall za.d[w11, 4:7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11100001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e1 + +smlall za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700221 + +smlall za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x21,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700221 + +smlall za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0021 + +smlall za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00100001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x21,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0021 + +smlall za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01100000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744260 + +smlall za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01100000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x60,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744260 + +smlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720180 + +smlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x80,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720180 + +smlall za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00100001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4021 + +smlall za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00100001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x21,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4021 + +smlall za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c1 + +smlall za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c1 + +smlall za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00100000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716120 + +smlall za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00100000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x20,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716120 + +smlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2181 + +smlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x81,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2181 + + +smlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908000 + +smlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x00,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908000 + +smlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00000101 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x05,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c505 + +smlall za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00000101 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x05,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c505 + +smlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e587 + +smlall za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x87,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e587 + +smlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x87,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe787 + +smlall za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10000111 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x87,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe787 + +smlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908605 + +smlall za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x05,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908605 + +smlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8401 + +smlall za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x01,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8401 + +smlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x00,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c600 + +smlall za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x00,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c600 + +smlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928180 + +smlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x80,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928180 + +smlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac001 + +smlall za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x01,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac001 + +smlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x85,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8285 + +smlall za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10000101 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x85,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8285 + +smlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00000010 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e502 + +smlall za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00000010 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x02,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e502 + +smlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10000111 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba187 + +smlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10000111 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x87,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba187 + + +smlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10000 + +smlall za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10000 + +smlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x01,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54101 + +smlall za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x01,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54101 + +smlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96181 + +smlall za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96181 + +smlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6381 + +smlall za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10000001 +// CHECK-INST: smlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6381 + +smlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10201 + +smlall za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x01,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10201 + +smlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0001 + +smlall za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x01,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0001 + +smlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54200 + +smlall za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00000000 +// CHECK-INST: smlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x00,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54200 + +smlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10180 + +smlall za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10000000 +// CHECK-INST: smlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x80,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10180 + +smlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94001 + +smlall za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00000001 +// CHECK-INST: smlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x01,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94001 + +smlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0281 + +smlall za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10000001 +// CHECK-INST: smlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x81,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0281 + +smlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00000000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16100 + +smlall za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00000000 +// CHECK-INST: smlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16100 + +smlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92181 + +smlall za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10000001 +// CHECK-INST: smlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x81,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92181 + diff --git a/llvm/test/MC/AArch64/SME2/smlsll-diagnostics.s b/llvm/test/MC/AArch64/SME2/smlsll-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlsll-diagnostics.s @@ -0,0 +1,79 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +smlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: smlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: smlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +smlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: smlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +smlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: smlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.s[w12, 6:7, vgx4], {z12.b-z15.b}, z8.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: smlsll za.s[w12, 6:7, vgx4], {z12.b-z15.b}, z8.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +smlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: smlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +smlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: smlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smlsll-diagnostics.s-e b/llvm/test/MC/AArch64/SME2/smlsll-diagnostics.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlsll-diagnostics.s-e @@ -0,0 +1,79 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +smlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: smlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: smlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +smlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: smlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: smlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +smlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.s[w12, 6:7, vgx4], {z12.b-z15.b}, z8.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlsll za.s[w12, 6:7, vgx4], {z12.b-z15.b}, z8.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +smlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: smlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +smlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: smlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: smlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smlsll.s b/llvm/test/MC/AArch64/SME2/smlsll.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlsll.s @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smlsll za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x08,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200408 + +smlsll za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x49,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254549 + +smlsll za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xab,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865ab + +smlsll za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xeb,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67eb + +smlsll za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x29,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200629 + +smlsll za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x29,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0429 + +smlsll za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x68,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244668 + +smlsll za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x88,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220588 + +smlsll za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x29,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4429 + +smlsll za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xc9,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06c9 + +smlsll za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00101010 +// CHECK-INST: smlsll za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x2a,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121652a + +smlsll za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10001011 +// CHECK-INST: smlsll za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x8b,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b258b + + +smlsll za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x08,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000008 + +smlsll za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x49,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055549 + +smlsll za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xab,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108edab + +smlsll za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xeb,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10fffeb + +smlsll za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x29,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e29 + +smlsll za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x29,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8429 + +smlsll za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x68,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045668 + +smlsll za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x88,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021988 + +smlsll za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x29,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac829 + +smlsll za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xc9,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ac9 + +smlsll za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00101010 +// CHECK-INST: smlsll za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x2a,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f52a + +smlsll za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10001011 +// CHECK-INST: smlsll za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x8b,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba98b + + +smlsll za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x08,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600408 + +smlsll za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x49,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654549 + +smlsll za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xab,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865ab + +smlsll za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xeb,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67eb + +smlsll za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x29,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600629 + +smlsll za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x29,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0429 + +smlsll za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x68,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644668 + +smlsll za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x88,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620588 + +smlsll za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x29,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4429 + +smlsll za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xc9,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06c9 + +smlsll za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00101010 +// CHECK-INST: smlsll za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x2a,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161652a + +smlsll za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10001011 +// CHECK-INST: smlsll za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x8b,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b258b + + +smlsll za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800008 + +smlsll za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x49,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854549 + +smlsll za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xab,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188edab + +smlsll za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xeb,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18fefeb + +smlsll za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x29,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e29 + +smlsll za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x29,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8429 + +smlsll za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x68,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844668 + +smlsll za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x88,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820988 + +smlsll za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x29,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac829 + +smlsll za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xc9,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ac9 + +smlsll za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00101010 +// CHECK-INST: smlsll za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x2a,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e52a + +smlsll za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10001011 +// CHECK-INST: smlsll za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x8b,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba98b + + +smlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200008 + +smlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200008 + +smlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254149 + +smlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254149 + +smlsll za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a9 + +smlsll za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a9 + +smlsll za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e9 + +smlsll za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e9 + +smlsll za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200229 + +smlsll za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200229 + +smlsll za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0029 + +smlsll za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0029 + +smlsll za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01101000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244268 + +smlsll za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01101000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244268 + +smlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220188 + +smlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220188 + +smlsll za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00101001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4029 + +smlsll za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00101001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4029 + +smlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c9 + +smlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c9 + +smlsll za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00101000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216128 + +smlsll za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00101000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216128 + +smlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2189 + +smlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2189 + + +smlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100008 + +smlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100008 + +smlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01001101 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x4d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115454d + +smlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01001101 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x4d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115454d + +smlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d8f + +smlsll za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d8f + +smlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xcf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fcf + +smlsll za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xcf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fcf + +smlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e0d + +smlsll za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e0d + +smlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0409 + +smlsll za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0409 + +smlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x48,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144648 + +smlsll za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x48,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144648 + +smlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120988 + +smlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120988 + +smlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4809 + +smlsll za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4809 + +smlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xcd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0acd + +smlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xcd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0acd + +smlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00001010 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111650a + +smlsll za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00001010 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111650a + +smlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10001111 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b298f + +smlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10001111 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b298f + + +smlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00008 + +smlsll za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00008 + +smlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x49,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44149 + +smlsll za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x49,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44149 + +smlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x89,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86189 + +smlsll za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x89,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86189 + +smlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c9 + +smlsll za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c9 + +smlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x09,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00209 + +smlsll za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x09,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00209 + +smlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0009 + +smlsll za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0009 + +smlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x48,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44248 + +smlsll za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x48,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44248 + +smlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20188 + +smlsll za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20188 + +smlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x09,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4009 + +smlsll za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x09,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4009 + +smlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c9 + +smlsll za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c9 + +smlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00001000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06108 + +smlsll za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00001000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06108 + +smlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x89,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2189 + +smlsll za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x89,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2189 + + +smlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600008 + +smlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600008 + +smlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654149 + +smlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654149 + +smlsll za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a9 + +smlsll za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a9 + +smlsll za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e9 + +smlsll za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e9 + +smlsll za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600229 + +smlsll za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600229 + +smlsll za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0029 + +smlsll za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0029 + +smlsll za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01101000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644268 + +smlsll za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01101000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644268 + +smlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620188 + +smlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620188 + +smlsll za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00101001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4029 + +smlsll za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00101001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4029 + +smlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c9 + +smlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c9 + +smlsll za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00101000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616128 + +smlsll za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00101000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616128 + +smlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2189 + +smlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2189 + + +smlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900008 + +smlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900008 + +smlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01001101 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x4d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195454d + +smlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01001101 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x4d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195454d + +smlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198658f + +smlsll za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198658f + +smlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xcf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67cf + +smlsll za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xcf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67cf + +smlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190060d + +smlsll za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190060d + +smlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0409 + +smlsll za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0409 + +smlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x48,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944648 + +smlsll za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x48,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944648 + +smlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920188 + +smlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920188 + +smlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4009 + +smlsll za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4009 + +smlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02cd + +smlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02cd + +smlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00001010 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191650a + +smlsll za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00001010 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191650a + +smlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10001111 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b218f + +smlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10001111 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b218f + + +smlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00008 + +smlsll za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00008 + +smlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x49,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44149 + +smlsll za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x49,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44149 + +smlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x89,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86189 + +smlsll za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x89,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86189 + +smlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c9 + +smlsll za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c9 + +smlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x09,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00209 + +smlsll za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x09,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00209 + +smlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0009 + +smlsll za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0009 + +smlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x48,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44248 + +smlsll za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x48,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44248 + +smlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20188 + +smlsll za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20188 + +smlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x09,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4009 + +smlsll za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x09,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4009 + +smlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c9 + +smlsll za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c9 + +smlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00001000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06108 + +smlsll za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00001000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06108 + +smlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x89,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2189 + +smlsll za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x89,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2189 + + +smlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300008 + +smlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300008 + +smlsll za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354149 + +smlsll za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354149 + +smlsll za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a9 + +smlsll za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a9 + +smlsll za.s[w11, 4:7, vgx4], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e9 + +smlsll za.s[w11, 4:7], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e9 + +smlsll za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300229 + +smlsll za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300229 + +smlsll za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0029 + +smlsll za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0029 + +smlsll za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344268 + +smlsll za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344268 + +smlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320188 + +smlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320188 + +smlsll za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4029 + +smlsll za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4029 + +smlsll za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c9 + +smlsll za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c9 + +smlsll za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00101000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316128 + +smlsll za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00101000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316128 + +smlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2189 + +smlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2189 + + +smlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108008 + +smlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108008 + +smlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00001101 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x0d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c50d + +smlsll za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00001101 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x0d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c50d + +smlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed8f + +smlsll za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed8f + +smlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x8f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef8f + +smlsll za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x8f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef8f + +smlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e0d + +smlsll za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e0d + +smlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8409 + +smlsll za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8409 + +smlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x08,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c608 + +smlsll za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x08,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c608 + +smlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128988 + +smlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128988 + +smlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac809 + +smlsll za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac809 + +smlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x8d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a8d + +smlsll za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x8d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a8d + +smlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00001010 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e50a + +smlsll za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00001010 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e50a + +smlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10001111 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba98f + +smlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10001111 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba98f + + +smlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10008 + +smlsll za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10008 + +smlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x09,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54109 + +smlsll za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x09,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54109 + +smlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96189 + +smlsll za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96189 + +smlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6389 + +smlsll za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6389 + +smlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x09,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10209 + +smlsll za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x09,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10209 + +smlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0009 + +smlsll za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0009 + +smlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x08,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54208 + +smlsll za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x08,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54208 + +smlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10188 + +smlsll za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10188 + +smlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x09,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94009 + +smlsll za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x09,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94009 + +smlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0289 + +smlsll za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0289 + +smlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00001000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16108 + +smlsll za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00001000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16108 + +smlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92189 + +smlsll za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92189 + + +smlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700008 + +smlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700008 + +smlsll za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754149 + +smlsll za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754149 + +smlsll za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a9 + +smlsll za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a9 + +smlsll za.d[w11, 4:7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e9 + +smlsll za.d[w11, 4:7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e9 + +smlsll za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700229 + +smlsll za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700229 + +smlsll za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0029 + +smlsll za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0029 + +smlsll za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744268 + +smlsll za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744268 + +smlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720188 + +smlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720188 + +smlsll za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4029 + +smlsll za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4029 + +smlsll za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c9 + +smlsll za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c9 + +smlsll za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00101000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716128 + +smlsll za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00101000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716128 + +smlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2189 + +smlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2189 + + +smlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908008 + +smlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908008 + +smlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00001101 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x0d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c50d + +smlsll za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00001101 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x0d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c50d + +smlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e58f + +smlsll za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e58f + +smlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x8f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe78f + +smlsll za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x8f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe78f + +smlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190860d + +smlsll za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190860d + +smlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8409 + +smlsll za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8409 + +smlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x08,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c608 + +smlsll za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x08,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c608 + +smlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928188 + +smlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928188 + +smlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac009 + +smlsll za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac009 + +smlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x8d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e828d + +smlsll za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x8d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e828d + +smlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00001010 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e50a + +smlsll za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00001010 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e50a + +smlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10001111 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba18f + +smlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10001111 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba18f + + +smlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10008 + +smlsll za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10008 + +smlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x09,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54109 + +smlsll za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x09,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54109 + +smlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96189 + +smlsll za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96189 + +smlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6389 + +smlsll za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6389 + +smlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x09,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10209 + +smlsll za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x09,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10209 + +smlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0009 + +smlsll za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0009 + +smlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x08,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54208 + +smlsll za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x08,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54208 + +smlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10188 + +smlsll za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10188 + +smlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x09,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94009 + +smlsll za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x09,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94009 + +smlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0289 + +smlsll za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0289 + +smlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00001000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16108 + +smlsll za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00001000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16108 + +smlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92189 + +smlsll za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92189 + diff --git a/llvm/test/MC/AArch64/SME2/smlsll.s-e b/llvm/test/MC/AArch64/SME2/smlsll.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smlsll.s-e @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smlsll za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x08,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200408 + +smlsll za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x49,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254549 + +smlsll za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xab,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865ab + +smlsll za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xeb,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67eb + +smlsll za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x29,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200629 + +smlsll za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x29,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0429 + +smlsll za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x68,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244668 + +smlsll za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x88,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220588 + +smlsll za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x29,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4429 + +smlsll za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xc9,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06c9 + +smlsll za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00101010 +// CHECK-INST: smlsll za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x2a,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121652a + +smlsll za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10001011 +// CHECK-INST: smlsll za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x8b,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b258b + + +smlsll za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x08,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000008 + +smlsll za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x49,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055549 + +smlsll za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xab,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108edab + +smlsll za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11101011 +// CHECK-INST: smlsll za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xeb,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10fffeb + +smlsll za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x29,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e29 + +smlsll za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x29,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8429 + +smlsll za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x68,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045668 + +smlsll za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x88,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021988 + +smlsll za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x29,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac829 + +smlsll za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xc9,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ac9 + +smlsll za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00101010 +// CHECK-INST: smlsll za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x2a,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f52a + +smlsll za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10001011 +// CHECK-INST: smlsll za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x8b,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba98b + + +smlsll za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x08,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600408 + +smlsll za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x49,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654549 + +smlsll za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xab,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865ab + +smlsll za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xeb,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67eb + +smlsll za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x29,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600629 + +smlsll za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x29,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0429 + +smlsll za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x68,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644668 + +smlsll za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x88,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620588 + +smlsll za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x29,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4429 + +smlsll za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xc9,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06c9 + +smlsll za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00101010 +// CHECK-INST: smlsll za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x2a,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161652a + +smlsll za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10001011 +// CHECK-INST: smlsll za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x8b,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b258b + + +smlsll za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800008 + +smlsll za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x49,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854549 + +smlsll za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xab,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188edab + +smlsll za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11101011 +// CHECK-INST: smlsll za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xeb,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18fefeb + +smlsll za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x29,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e29 + +smlsll za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x29,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8429 + +smlsll za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x68,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844668 + +smlsll za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x88,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820988 + +smlsll za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x29,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac829 + +smlsll za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xc9,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ac9 + +smlsll za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00101010 +// CHECK-INST: smlsll za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x2a,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e52a + +smlsll za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10001011 +// CHECK-INST: smlsll za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x8b,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba98b + + +smlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200008 + +smlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200008 + +smlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254149 + +smlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254149 + +smlsll za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a9 + +smlsll za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a9 + +smlsll za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e9 + +smlsll za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11101001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e9 + +smlsll za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200229 + +smlsll za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200229 + +smlsll za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0029 + +smlsll za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00101001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0029 + +smlsll za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01101000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244268 + +smlsll za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01101000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244268 + +smlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220188 + +smlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220188 + +smlsll za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00101001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4029 + +smlsll za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00101001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4029 + +smlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c9 + +smlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c9 + +smlsll za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00101000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216128 + +smlsll za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00101000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216128 + +smlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2189 + +smlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2189 + + +smlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100008 + +smlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100008 + +smlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01001101 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x4d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115454d + +smlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01001101 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x4d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115454d + +smlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d8f + +smlsll za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d8f + +smlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xcf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fcf + +smlsll za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11001111 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xcf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fcf + +smlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e0d + +smlsll za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e0d + +smlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0409 + +smlsll za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0409 + +smlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x48,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144648 + +smlsll za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x48,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144648 + +smlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120988 + +smlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120988 + +smlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4809 + +smlsll za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4809 + +smlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xcd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0acd + +smlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11001101 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xcd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0acd + +smlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00001010 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111650a + +smlsll za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00001010 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111650a + +smlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10001111 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b298f + +smlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10001111 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b298f + + +smlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00008 + +smlsll za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00008 + +smlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x49,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44149 + +smlsll za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x49,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44149 + +smlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x89,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86189 + +smlsll za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x89,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86189 + +smlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c9 + +smlsll za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11001001 +// CHECK, INST: smlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c9 + +smlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x09,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00209 + +smlsll za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x09,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00209 + +smlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0009 + +smlsll za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0009 + +smlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x48,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44248 + +smlsll za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01001000 +// CHECK, INST: smlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x48,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44248 + +smlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20188 + +smlsll za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10001000 +// CHECK, INST: smlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20188 + +smlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x09,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4009 + +smlsll za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00001001 +// CHECK, INST: smlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x09,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4009 + +smlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c9 + +smlsll za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11001001 +// CHECK, INST: smlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c9 + +smlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00001000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06108 + +smlsll za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00001000 +// CHECK, INST: smlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06108 + +smlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x89,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2189 + +smlsll za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10001001 +// CHECK, INST: smlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x89,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2189 + + +smlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600008 + +smlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600008 + +smlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654149 + +smlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654149 + +smlsll za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a9 + +smlsll za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861a9 + +smlsll za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e9 + +smlsll za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11101001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63e9 + +smlsll za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600229 + +smlsll za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600229 + +smlsll za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0029 + +smlsll za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00101001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0029 + +smlsll za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01101000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644268 + +smlsll za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01101000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644268 + +smlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620188 + +smlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620188 + +smlsll za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00101001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4029 + +smlsll za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00101001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4029 + +smlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c9 + +smlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02c9 + +smlsll za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00101000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616128 + +smlsll za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00101000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616128 + +smlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2189 + +smlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2189 + + +smlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900008 + +smlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900008 + +smlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01001101 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x4d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195454d + +smlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01001101 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x4d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195454d + +smlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198658f + +smlsll za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198658f + +smlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xcf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67cf + +smlsll za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11001111 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xcf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67cf + +smlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190060d + +smlsll za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190060d + +smlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0409 + +smlsll za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0409 + +smlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x48,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944648 + +smlsll za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x48,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944648 + +smlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920188 + +smlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920188 + +smlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4009 + +smlsll za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4009 + +smlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02cd + +smlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11001101 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xcd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02cd + +smlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00001010 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191650a + +smlsll za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00001010 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191650a + +smlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10001111 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b218f + +smlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10001111 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b218f + + +smlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00008 + +smlsll za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00008 + +smlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x49,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44149 + +smlsll za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x49,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44149 + +smlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x89,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86189 + +smlsll za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x89,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86189 + +smlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c9 + +smlsll za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11001001 +// CHECK, INST: smlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63c9 + +smlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x09,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00209 + +smlsll za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x09,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00209 + +smlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0009 + +smlsll za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0009 + +smlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x48,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44248 + +smlsll za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01001000 +// CHECK, INST: smlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x48,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44248 + +smlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20188 + +smlsll za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10001000 +// CHECK, INST: smlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20188 + +smlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x09,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4009 + +smlsll za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00001001 +// CHECK, INST: smlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x09,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4009 + +smlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c9 + +smlsll za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11001001 +// CHECK, INST: smlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xc9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02c9 + +smlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00001000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06108 + +smlsll za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00001000 +// CHECK, INST: smlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x08,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06108 + +smlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x89,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2189 + +smlsll za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10001001 +// CHECK, INST: smlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x89,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2189 + + +smlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300008 + +smlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x08,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300008 + +smlsll za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354149 + +smlsll za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x49,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354149 + +smlsll za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a9 + +smlsll za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a9 + +smlsll za.s[w11, 4:7, vgx4], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e9 + +smlsll za.s[w11, 4:7], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11101001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e9 + +smlsll za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300229 + +smlsll za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x29,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300229 + +smlsll za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0029 + +smlsll za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00101001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x29,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0029 + +smlsll za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344268 + +smlsll za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01101000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x68,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344268 + +smlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320188 + +smlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x88,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320188 + +smlsll za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4029 + +smlsll za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00101001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x29,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4029 + +smlsll za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c9 + +smlsll za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c9 + +smlsll za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00101000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316128 + +smlsll za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00101000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x28,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316128 + +smlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2189 + +smlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x89,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2189 + + +smlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108008 + +smlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x08,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108008 + +smlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00001101 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x0d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c50d + +smlsll za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00001101 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x0d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c50d + +smlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed8f + +smlsll za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x8f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed8f + +smlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x8f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef8f + +smlsll za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10001111 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x8f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef8f + +smlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e0d + +smlsll za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x0d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e0d + +smlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8409 + +smlsll za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x09,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8409 + +smlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x08,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c608 + +smlsll za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x08,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c608 + +smlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128988 + +smlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x88,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128988 + +smlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac809 + +smlsll za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x09,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac809 + +smlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x8d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a8d + +smlsll za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10001101 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x8d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a8d + +smlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00001010 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e50a + +smlsll za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00001010 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x0a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e50a + +smlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10001111 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba98f + +smlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10001111 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x8f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba98f + + +smlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10008 + +smlsll za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10008 + +smlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x09,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54109 + +smlsll za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x09,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54109 + +smlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96189 + +smlsll za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96189 + +smlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6389 + +smlsll za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10001001 +// CHECK-INST: smlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6389 + +smlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x09,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10209 + +smlsll za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x09,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10209 + +smlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0009 + +smlsll za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x09,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0009 + +smlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x08,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54208 + +smlsll za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00001000 +// CHECK-INST: smlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x08,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54208 + +smlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10188 + +smlsll za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10001000 +// CHECK-INST: smlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x88,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10188 + +smlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x09,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94009 + +smlsll za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00001001 +// CHECK-INST: smlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x09,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94009 + +smlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0289 + +smlsll za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10001001 +// CHECK-INST: smlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x89,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0289 + +smlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00001000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16108 + +smlsll za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00001000 +// CHECK-INST: smlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16108 + +smlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92189 + +smlsll za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10001001 +// CHECK-INST: smlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x89,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92189 + + +smlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700008 + +smlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x08,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700008 + +smlsll za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754149 + +smlsll za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x49,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754149 + +smlsll za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a9 + +smlsll za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xa9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861a9 + +smlsll za.d[w11, 4:7, vgx4], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e9 + +smlsll za.d[w11, 4:7], {z31.h, z0.h, z1.h, z2.h}, z15.h // 11000001-01111111-01100011-11101001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xe9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63e9 + +smlsll za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700229 + +smlsll za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x29,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700229 + +smlsll za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0029 + +smlsll za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00101001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x29,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0029 + +smlsll za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744268 + +smlsll za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01101000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x68,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744268 + +smlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720188 + +smlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x88,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720188 + +smlsll za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4029 + +smlsll za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00101001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x29,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4029 + +smlsll za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c9 + +smlsll za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xc9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02c9 + +smlsll za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00101000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716128 + +smlsll za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00101000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x28,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716128 + +smlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2189 + +smlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x89,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2189 + + +smlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908008 + +smlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908008 + +smlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00001101 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x0d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c50d + +smlsll za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00001101 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x0d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c50d + +smlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e58f + +smlsll za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x8f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e58f + +smlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x8f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe78f + +smlsll za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10001111 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x8f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe78f + +smlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190860d + +smlsll za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x0d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190860d + +smlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8409 + +smlsll za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x09,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8409 + +smlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x08,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c608 + +smlsll za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x08,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c608 + +smlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928188 + +smlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928188 + +smlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac009 + +smlsll za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac009 + +smlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x8d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e828d + +smlsll za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10001101 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x8d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e828d + +smlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00001010 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e50a + +smlsll za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00001010 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x0a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e50a + +smlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10001111 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba18f + +smlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10001111 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x8f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba18f + + +smlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10008 + +smlsll za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10008 + +smlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x09,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54109 + +smlsll za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x09,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54109 + +smlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96189 + +smlsll za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96189 + +smlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6389 + +smlsll za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10001001 +// CHECK-INST: smlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6389 + +smlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x09,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10209 + +smlsll za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x09,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10209 + +smlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0009 + +smlsll za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x09,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0009 + +smlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x08,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54208 + +smlsll za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00001000 +// CHECK-INST: smlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x08,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54208 + +smlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10188 + +smlsll za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10001000 +// CHECK-INST: smlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x88,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10188 + +smlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x09,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94009 + +smlsll za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00001001 +// CHECK-INST: smlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x09,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94009 + +smlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0289 + +smlsll za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10001001 +// CHECK-INST: smlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x89,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0289 + +smlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00001000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16108 + +smlsll za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00001000 +// CHECK-INST: smlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x08,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16108 + +smlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92189 + +smlsll za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10001001 +// CHECK-INST: smlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x89,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92189 + diff --git a/llvm/test/MC/AArch64/SME2/smopa-diagnostics.s b/llvm/test/MC/AArch64/SME2/smopa-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smopa-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid tile + +smopa za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smopa za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate + +smopa za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smopa za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smopa za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: smopa za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid suffixes + +smopa za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s +// CHECK-NEXT: smopa za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smopa za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smopa.s b/llvm/test/MC/AArch64/SME2/smopa.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smopa.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smopa za0.s, p0/m, p0/m, z0.h, z0.h // 10100000-10000000-00000000-00001000 +// CHECK-INST: smopa za0.s, p0/m, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x08,0x00,0x80,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0800008 + +smopa za1.s, p5/m, p2/m, z10.h, z21.h // 10100000-10010101-01010101-01001001 +// CHECK-INST: smopa za1.s, p5/m, p2/m, z10.h, z21.h +// CHECK-ENCODING: [0x49,0x55,0x95,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0955549 + +smopa za3.s, p3/m, p7/m, z13.h, z8.h // 10100000-10001000-11101101-10101011 +// CHECK-INST: smopa za3.s, p3/m, p7/m, z13.h, z8.h +// CHECK-ENCODING: [0xab,0xed,0x88,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a088edab + +smopa za3.s, p7/m, p7/m, z31.h, z31.h // 10100000-10011111-11111111-11101011 +// CHECK-INST: smopa za3.s, p7/m, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xeb,0xff,0x9f,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09fffeb + +smopa za1.s, p3/m, p0/m, z17.h, z16.h // 10100000-10010000-00001110-00101001 +// CHECK-INST: smopa za1.s, p3/m, p0/m, z17.h, z16.h +// CHECK-ENCODING: [0x29,0x0e,0x90,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0900e29 + +smopa za1.s, p1/m, p4/m, z1.h, z30.h // 10100000-10011110-10000100-00101001 +// CHECK-INST: smopa za1.s, p1/m, p4/m, z1.h, z30.h +// CHECK-ENCODING: [0x29,0x84,0x9e,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09e8429 + +smopa za0.s, p5/m, p2/m, z19.h, z20.h // 10100000-10010100-01010110-01101000 +// CHECK-INST: smopa za0.s, p5/m, p2/m, z19.h, z20.h +// CHECK-ENCODING: [0x68,0x56,0x94,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0945668 + +smopa za0.s, p6/m, p0/m, z12.h, z2.h // 10100000-10000010-00011001-10001000 +// CHECK-INST: smopa za0.s, p6/m, p0/m, z12.h, z2.h +// CHECK-ENCODING: [0x88,0x19,0x82,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0821988 + +smopa za1.s, p2/m, p6/m, z1.h, z26.h // 10100000-10011010-11001000-00101001 +// CHECK-INST: smopa za1.s, p2/m, p6/m, z1.h, z26.h +// CHECK-ENCODING: [0x29,0xc8,0x9a,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09ac829 + +smopa za1.s, p2/m, p0/m, z22.h, z30.h // 10100000-10011110-00001010-11001001 +// CHECK-INST: smopa za1.s, p2/m, p0/m, z22.h, z30.h +// CHECK-ENCODING: [0xc9,0x0a,0x9e,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09e0ac9 + +smopa za2.s, p5/m, p7/m, z9.h, z1.h // 10100000-10000001-11110101-00101010 +// CHECK-INST: smopa za2.s, p5/m, p7/m, z9.h, z1.h +// CHECK-ENCODING: [0x2a,0xf5,0x81,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a081f52a + +smopa za3.s, p2/m, p5/m, z12.h, z11.h // 10100000-10001011-10101001-10001011 +// CHECK-INST: smopa za3.s, p2/m, p5/m, z12.h, z11.h +// CHECK-ENCODING: [0x8b,0xa9,0x8b,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a08ba98b + diff --git a/llvm/test/MC/AArch64/SME2/smops-diagnostics.s b/llvm/test/MC/AArch64/SME2/smops-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smops-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid tile + +smops za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smops za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate + +smops za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: smops za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smops za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: smops za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid suffixes + +smops za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s +// CHECK-NEXT: smops za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +smops za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/smops.s b/llvm/test/MC/AArch64/SME2/smops.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/smops.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +smops za0.s, p0/m, p0/m, z0.h, z0.h // 10100000-10000000-00000000-00011000 +// CHECK-INST: smops za0.s, p0/m, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x18,0x00,0x80,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0800018 + +smops za1.s, p5/m, p2/m, z10.h, z21.h // 10100000-10010101-01010101-01011001 +// CHECK-INST: smops za1.s, p5/m, p2/m, z10.h, z21.h +// CHECK-ENCODING: [0x59,0x55,0x95,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0955559 + +smops za3.s, p3/m, p7/m, z13.h, z8.h // 10100000-10001000-11101101-10111011 +// CHECK-INST: smops za3.s, p3/m, p7/m, z13.h, z8.h +// CHECK-ENCODING: [0xbb,0xed,0x88,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a088edbb + +smops za3.s, p7/m, p7/m, z31.h, z31.h // 10100000-10011111-11111111-11111011 +// CHECK-INST: smops za3.s, p7/m, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xfb,0xff,0x9f,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09ffffb + +smops za1.s, p3/m, p0/m, z17.h, z16.h // 10100000-10010000-00001110-00111001 +// CHECK-INST: smops za1.s, p3/m, p0/m, z17.h, z16.h +// CHECK-ENCODING: [0x39,0x0e,0x90,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0900e39 + +smops za1.s, p1/m, p4/m, z1.h, z30.h // 10100000-10011110-10000100-00111001 +// CHECK-INST: smops za1.s, p1/m, p4/m, z1.h, z30.h +// CHECK-ENCODING: [0x39,0x84,0x9e,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09e8439 + +smops za0.s, p5/m, p2/m, z19.h, z20.h // 10100000-10010100-01010110-01111000 +// CHECK-INST: smops za0.s, p5/m, p2/m, z19.h, z20.h +// CHECK-ENCODING: [0x78,0x56,0x94,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0945678 + +smops za0.s, p6/m, p0/m, z12.h, z2.h // 10100000-10000010-00011001-10011000 +// CHECK-INST: smops za0.s, p6/m, p0/m, z12.h, z2.h +// CHECK-ENCODING: [0x98,0x19,0x82,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a0821998 + +smops za1.s, p2/m, p6/m, z1.h, z26.h // 10100000-10011010-11001000-00111001 +// CHECK-INST: smops za1.s, p2/m, p6/m, z1.h, z26.h +// CHECK-ENCODING: [0x39,0xc8,0x9a,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09ac839 + +smops za1.s, p2/m, p0/m, z22.h, z30.h // 10100000-10011110-00001010-11011001 +// CHECK-INST: smops za1.s, p2/m, p0/m, z22.h, z30.h +// CHECK-ENCODING: [0xd9,0x0a,0x9e,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a09e0ad9 + +smops za2.s, p5/m, p7/m, z9.h, z1.h // 10100000-10000001-11110101-00111010 +// CHECK-INST: smops za2.s, p5/m, p7/m, z9.h, z1.h +// CHECK-ENCODING: [0x3a,0xf5,0x81,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a081f53a + +smops za3.s, p2/m, p5/m, z12.h, z11.h // 10100000-10001011-10101001-10011011 +// CHECK-INST: smops za3.s, p2/m, p5/m, z12.h, z11.h +// CHECK-ENCODING: [0x9b,0xa9,0x8b,0xa0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a08ba99b + diff --git a/llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s b/llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s --- a/llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s @@ -1,4 +1,4 @@ -// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 2>&1 < %s | FileCheck %s +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s // --------------------------------------------------------------------------// // Invalid vector list @@ -73,7 +73,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: sqdmulh {z0.s,z1.s}, {z0.s,z2.s}, z15.s -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: sqdmulh {z0.s,z1.s}, {z0.s,z2.s}, z15.s // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s b/llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s-e copy from llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s copy to llvm/test/MC/AArch64/SME2/sqdmulh-diagnostics.s-e diff --git a/llvm/test/MC/AArch64/SME2/sqdmulh.s b/llvm/test/MC/AArch64/SME2/sqdmulh.s --- a/llvm/test/MC/AArch64/SME2/sqdmulh.s +++ b/llvm/test/MC/AArch64/SME2/sqdmulh.s @@ -62,6 +62,31 @@ // CHECK-UNKNOWN: c17eb41e +sqdmulh {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110100-00000000 +// CHECK-INST: sqdmulh { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xb4,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b400 + +sqdmulh {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110100-00010100 +// CHECK-INST: sqdmulh { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x14,0xb4,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b414 + +sqdmulh {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110100-00010110 +// CHECK-INST: sqdmulh { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x16,0xb4,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b416 + +sqdmulh {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110100-00011110 +// CHECK-INST: sqdmulh { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x1e,0xb4,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb41e + + sqdmulh {z0.s - z1.s}, {z0.s - z1.s}, z0.s // 11000001-10100000-10100100-00000000 // CHECK-INST: sqdmulh { z0.s, z1.s }, { z0.s, z1.s }, z0.s // CHECK-ENCODING: [0x00,0xa4,0xa0,0xc1] @@ -112,6 +137,31 @@ // CHECK-UNKNOWN: c1beb41e +sqdmulh {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110100-00000000 +// CHECK-INST: sqdmulh { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xb4,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b400 + +sqdmulh {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110100-00010100 +// CHECK-INST: sqdmulh { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x14,0xb4,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b414 + +sqdmulh {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110100-00010110 +// CHECK-INST: sqdmulh { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x16,0xb4,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b416 + +sqdmulh {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110100-00011110 +// CHECK-INST: sqdmulh { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x1e,0xb4,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb41e + + sqdmulh {z0.d - z1.d}, {z0.d - z1.d}, z0.d // 11000001-11100000-10100100-00000000 // CHECK-INST: sqdmulh { z0.d, z1.d }, { z0.d, z1.d }, z0.d // CHECK-ENCODING: [0x00,0xa4,0xe0,0xc1] @@ -162,6 +212,31 @@ // CHECK-UNKNOWN: c1feb41e +sqdmulh {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110100-00000000 +// CHECK-INST: sqdmulh { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x00,0xb4,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b400 + +sqdmulh {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110100-00010100 +// CHECK-INST: sqdmulh { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x14,0xb4,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b414 + +sqdmulh {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110100-00010110 +// CHECK-INST: sqdmulh { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x16,0xb4,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b416 + +sqdmulh {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110100-00011110 +// CHECK-INST: sqdmulh { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x1e,0xb4,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb41e + + sqdmulh {z0.b - z1.b}, {z0.b - z1.b}, z0.b // 11000001-00100000-10100100-00000000 // CHECK-INST: sqdmulh { z0.b, z1.b }, { z0.b, z1.b }, z0.b // CHECK-ENCODING: [0x00,0xa4,0x20,0xc1] @@ -186,6 +261,30 @@ // CHECK-ERROR: instruction requires: sme2 // CHECK-UNKNOWN: c12fa41e +sqdmulh {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110100-00000000 +// CHECK-INST: sqdmulh { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0xb4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b400 + +sqdmulh {z20.b - z21.b}, {z20.b - z21.b}, {z20.b - z21.b} // 11000001-00110100-10110100-00010100 +// CHECK-INST: sqdmulh { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x14,0xb4,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b414 + +sqdmulh {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110100-00010110 +// CHECK-INST: sqdmulh { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x16,0xb4,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b416 + +sqdmulh {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110100-00011110 +// CHECK-INST: sqdmulh { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x1e,0xb4,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb41e + sqdmulh {z0.b - z1.b}, {z0.b - z1.b}, {z0.b - z1.b} // 11000001-00100000-10110100-00000000 // CHECK-INST: sqdmulh { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } @@ -262,6 +361,31 @@ // CHECK-UNKNOWN: c17cbc1c +sqdmulh {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111100-00000000 +// CHECK-INST: sqdmulh { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xbc,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160bc00 + +sqdmulh {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111100-00010100 +// CHECK-INST: sqdmulh { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x14,0xbc,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174bc14 + +sqdmulh {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111100-00010100 +// CHECK-INST: sqdmulh { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x14,0xbc,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168bc14 + +sqdmulh {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111100-00011100 +// CHECK-INST: sqdmulh { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x1c,0xbc,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cbc1c + + sqdmulh {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101100-00000000 // CHECK-INST: sqdmulh { z0.s - z3.s }, { z0.s - z3.s }, z0.s // CHECK-ENCODING: [0x00,0xac,0xa0,0xc1] @@ -286,6 +410,30 @@ // CHECK-ERROR: instruction requires: sme2 // CHECK-UNKNOWN: c1afac1c +sqdmulh {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111100-00000000 +// CHECK-INST: sqdmulh { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xbc,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0bc00 + +sqdmulh {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111100-00010100 +// CHECK-INST: sqdmulh { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x14,0xbc,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4bc14 + +sqdmulh {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111100-00010100 +// CHECK-INST: sqdmulh { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xbc,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8bc14 + +sqdmulh {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111100-00011100 +// CHECK-INST: sqdmulh { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x1c,0xbc,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcbc1c + sqdmulh {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111100-00000000 // CHECK-INST: sqdmulh { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } @@ -362,6 +510,29 @@ // CHECK-UNKNOWN: c1fcbc1c +sqdmulh {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111100-00000000 +// CHECK-INST: sqdmulh { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xbc,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0bc00 + +sqdmulh {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111100-00010100 +// CHECK-INST: sqdmulh { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x14,0xbc,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4bc14 + +sqdmulh {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111100-00010100 +// CHECK-INST: sqdmulh { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x14,0xbc,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8bc14 + +sqdmulh {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111100-00011100 +// CHECK-INST: sqdmulh { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x1c,0xbc,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcbc1c sqdmulh {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101100-00000000 // CHECK-INST: sqdmulh { z0.b - z3.b }, { z0.b - z3.b }, z0.b // CHECK-ENCODING: [0x00,0xac,0x20,0xc1] diff --git a/llvm/test/MC/AArch64/SME2/sqrshr-diagnostics.s b/llvm/test/MC/AArch64/SME2/sqrshr-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshr-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sqrshr z0.b, {z0.s-z4.s}, #32 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sqrshr z0.b, {z0.s-z4.s}, #32 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshr z0.h, {z10.s-z12.s}, #15 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sqrshr z0.h, {z10.s-z12.s}, #15 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshr z0.h, {z1.d-z4.d}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: sqrshr z0.h, {z1.d-z4.d}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshr z0.h, {z1.s-z2.s}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: sqrshr z0.h, {z1.s-z2.s}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate + +sqrshr z31.h, {z28.d-z31.d}, #65 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: sqrshr z31.h, {z28.d-z31.d}, #65 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshr z31.h, {z28.s-z29.s}, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]. +// CHECK-NEXT: sqrshr z31.h, {z28.s-z29.s}, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshr z31.b, {z28.s-z31.s}, #33 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]. +// CHECK-NEXT: sqrshr z31.b, {z28.s-z31.s}, #33 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sqrshr z23.s, {z12.s-z15.s}, #15 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqrshr z23.s, {z12.s-z15.s}, #15 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshr z23.b, {z12.d-z15.d}, #15 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sqrshr z23.b, {z12.d-z15.d}, #15 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sqrshr.s b/llvm/test/MC/AArch64/SME2/sqrshr.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshr.s @@ -0,0 +1,88 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sqrshr z0.h, {z0.s - z1.s}, #16 // 11000001-11100000-11010100-00000000 +// CHECK-INST: sqrshr z0.h, { z0.s, z1.s }, #16 +// CHECK-ENCODING: [0x00,0xd4,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d400 + +sqrshr z21.h, {z10.s - z11.s}, #11 // 11000001-11100101-11010101-01010101 +// CHECK-INST: sqrshr z21.h, { z10.s, z11.s }, #11 +// CHECK-ENCODING: [0x55,0xd5,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5d555 + +sqrshr z23.h, {z12.s - z13.s}, #8 // 11000001-11101000-11010101-10010111 +// CHECK-INST: sqrshr z23.h, { z12.s, z13.s }, #8 +// CHECK-ENCODING: [0x97,0xd5,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d597 + +sqrshr z31.h, {z30.s - z31.s}, #1 // 11000001-11101111-11010111-11011111 +// CHECK-INST: sqrshr z31.h, { z30.s, z31.s }, #1 +// CHECK-ENCODING: [0xdf,0xd7,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efd7df + + +sqrshr z0.b, {z0.s - z3.s}, #32 // 11000001-01100000-11011000-00000000 +// CHECK-INST: sqrshr z0.b, { z0.s - z3.s }, #32 +// CHECK-ENCODING: [0x00,0xd8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d800 + +sqrshr z21.b, {z8.s - z11.s}, #11 // 11000001-01110101-11011001-00010101 +// CHECK-INST: sqrshr z21.b, { z8.s - z11.s }, #11 +// CHECK-ENCODING: [0x15,0xd9,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d915 + +sqrshr z23.b, {z12.s - z15.s}, #24 // 11000001-01101000-11011001-10010111 +// CHECK-INST: sqrshr z23.b, { z12.s - z15.s }, #24 +// CHECK-ENCODING: [0x97,0xd9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d997 + +sqrshr z31.b, {z28.s - z31.s}, #1 // 11000001-01111111-11011011-10011111 +// CHECK-INST: sqrshr z31.b, { z28.s - z31.s }, #1 +// CHECK-ENCODING: [0x9f,0xdb,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fdb9f + + +sqrshr z0.h, {z0.d - z3.d}, #64 // 11000001-10100000-11011000-00000000 +// CHECK-INST: sqrshr z0.h, { z0.d - z3.d }, #64 +// CHECK-ENCODING: [0x00,0xd8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d800 + +sqrshr z21.h, {z8.d - z11.d}, #11 // 11000001-11110101-11011001-00010101 +// CHECK-INST: sqrshr z21.h, { z8.d - z11.d }, #11 +// CHECK-ENCODING: [0x15,0xd9,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d915 + +sqrshr z23.h, {z12.d - z15.d}, #24 // 11000001-11101000-11011001-10010111 +// CHECK-INST: sqrshr z23.h, { z12.d - z15.d }, #24 +// CHECK-ENCODING: [0x97,0xd9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d997 + +sqrshr z31.h, {z28.d - z31.d}, #1 // 11000001-11111111-11011011-10011111 +// CHECK-INST: sqrshr z31.h, { z28.d - z31.d }, #1 +// CHECK-ENCODING: [0x9f,0xdb,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffdb9f + diff --git a/llvm/test/MC/AArch64/SME2/sqrshrn-diagnostics.s b/llvm/test/MC/AArch64/SME2/sqrshrn-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshrn-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sqrshrn z0.b, {z0.s-z4.s}, #32 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sqrshrn z0.b, {z0.s-z4.s}, #32 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshrn z0.h, {z1.d-z4.d}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: sqrshrn z0.h, {z1.d-z4.d}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate + +sqrshrn z31.h, {z28.d-z31.d}, #65 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: sqrshrn z31.h, {z28.d-z31.d}, #65 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sqrshrn z23.s, {z12.s-z15.s}, #24 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqrshrn z23.s, {z12.s-z15.s}, #24 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sqrshrn.s b/llvm/test/MC/AArch64/SME2/sqrshrn.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshrn.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sqrshrn z0.b, {z0.s - z3.s}, #32 // 11000001-01100000-11011100-00000000 +// CHECK-INST: sqrshrn z0.b, { z0.s - z3.s }, #32 +// CHECK-ENCODING: [0x00,0xdc,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160dc00 + +sqrshrn z21.b, {z8.s - z11.s}, #11 // 11000001-01110101-11011101-00010101 +// CHECK-INST: sqrshrn z21.b, { z8.s - z11.s }, #11 +// CHECK-ENCODING: [0x15,0xdd,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175dd15 + +sqrshrn z23.b, {z12.s - z15.s}, #24 // 11000001-01101000-11011101-10010111 +// CHECK-INST: sqrshrn z23.b, { z12.s - z15.s }, #24 +// CHECK-ENCODING: [0x97,0xdd,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168dd97 + +sqrshrn z31.b, {z28.s - z31.s}, #1 // 11000001-01111111-11011111-10011111 +// CHECK-INST: sqrshrn z31.b, { z28.s - z31.s }, #1 +// CHECK-ENCODING: [0x9f,0xdf,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fdf9f + + +sqrshrn z0.h, {z0.d - z3.d}, #64 // 11000001-10100000-11011100-00000000 +// CHECK-INST: sqrshrn z0.h, { z0.d - z3.d }, #64 +// CHECK-ENCODING: [0x00,0xdc,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0dc00 + +sqrshrn z21.h, {z8.d - z11.d}, #11 // 11000001-11110101-11011101-00010101 +// CHECK-INST: sqrshrn z21.h, { z8.d - z11.d }, #11 +// CHECK-ENCODING: [0x15,0xdd,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5dd15 + +sqrshrn z23.h, {z12.d - z15.d}, #24 // 11000001-11101000-11011101-10010111 +// CHECK-INST: sqrshrn z23.h, { z12.d - z15.d }, #24 +// CHECK-ENCODING: [0x97,0xdd,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8dd97 + +sqrshrn z31.h, {z28.d - z31.d}, #1 // 11000001-11111111-11011111-10011111 +// CHECK-INST: sqrshrn z31.h, { z28.d - z31.d }, #1 +// CHECK-ENCODING: [0x9f,0xdf,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffdf9f + diff --git a/llvm/test/MC/AArch64/SME2/sqrshru-diagnostics.s b/llvm/test/MC/AArch64/SME2/sqrshru-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshru-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sqrshru z0.b, {z0.s-z4.s}, #32 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sqrshru z0.b, {z0.s-z4.s}, #32 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshru z0.h, {z1.d-z4.d}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: sqrshru z0.h, {z1.d-z4.d}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate + +sqrshru z31.h, {z28.d-z31.d}, #65 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: sqrshru z31.h, {z28.d-z31.d}, #65 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshru z31.h, {z28.d-z31.d}, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: sqrshru z31.h, {z28.d-z31.d}, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sqrshru z23.s, {z12.s-z15.s}, #24 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqrshru z23.s, {z12.s-z15.s}, #24 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sqrshru.s b/llvm/test/MC/AArch64/SME2/sqrshru.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshru.s @@ -0,0 +1,87 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sqrshru z0.h, {z0.s - z1.s}, #16 // 11000001-11110000-11010100-00000000 +// CHECK-INST: sqrshru z0.h, { z0.s, z1.s }, #16 +// CHECK-ENCODING: [0x00,0xd4,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f0d400 + +sqrshru z21.h, {z10.s - z11.s}, #11 // 11000001-11110101-11010101-01010101 +// CHECK-INST: sqrshru z21.h, { z10.s, z11.s }, #11 +// CHECK-ENCODING: [0x55,0xd5,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d555 + +sqrshru z23.h, {z12.s - z13.s}, #8 // 11000001-11111000-11010101-10010111 +// CHECK-INST: sqrshru z23.h, { z12.s, z13.s }, #8 +// CHECK-ENCODING: [0x97,0xd5,0xf8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f8d597 + +sqrshru z31.h, {z30.s - z31.s}, #1 // 11000001-11111111-11010111-11011111 +// CHECK-INST: sqrshru z31.h, { z30.s, z31.s }, #1 +// CHECK-ENCODING: [0xdf,0xd7,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd7df + + +sqrshru z0.b, {z0.s - z3.s}, #32 // 11000001-01100000-11011000-01000000 +// CHECK-INST: sqrshru z0.b, { z0.s - z3.s }, #32 +// CHECK-ENCODING: [0x40,0xd8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d840 + +sqrshru z21.b, {z8.s - z11.s}, #11 // 11000001-01110101-11011001-01010101 +// CHECK-INST: sqrshru z21.b, { z8.s - z11.s }, #11 +// CHECK-ENCODING: [0x55,0xd9,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d955 + +sqrshru z23.b, {z12.s - z15.s}, #24 // 11000001-01101000-11011001-11010111 +// CHECK-INST: sqrshru z23.b, { z12.s - z15.s }, #24 +// CHECK-ENCODING: [0xd7,0xd9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d9d7 + +sqrshru z31.b, {z28.s - z31.s}, #1 // 11000001-01111111-11011011-11011111 +// CHECK-INST: sqrshru z31.b, { z28.s - z31.s }, #1 +// CHECK-ENCODING: [0xdf,0xdb,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fdbdf + + +sqrshru z0.h, {z0.d - z3.d}, #64 // 11000001-10100000-11011000-01000000 +// CHECK-INST: sqrshru z0.h, { z0.d - z3.d }, #64 +// CHECK-ENCODING: [0x40,0xd8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d840 + +sqrshru z21.h, {z8.d - z11.d}, #11 // 11000001-11110101-11011001-01010101 +// CHECK-INST: sqrshru z21.h, { z8.d - z11.d }, #11 +// CHECK-ENCODING: [0x55,0xd9,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d955 + +sqrshru z23.h, {z12.d - z15.d}, #24 // 11000001-11101000-11011001-11010111 +// CHECK-INST: sqrshru z23.h, { z12.d - z15.d }, #24 +// CHECK-ENCODING: [0xd7,0xd9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d9d7 + +sqrshru z31.h, {z28.d - z31.d}, #1 // 11000001-11111111-11011011-11011111 +// CHECK-INST: sqrshru z31.h, { z28.d - z31.d }, #1 +// CHECK-ENCODING: [0xdf,0xdb,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffdbdf diff --git a/llvm/test/MC/AArch64/SME2/sqrshrun-diagnostics.s b/llvm/test/MC/AArch64/SME2/sqrshrun-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshrun-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sqrshrun z0.b, {z0.s-z4.s}, #32 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sqrshrun z0.b, {z0.s-z4.s}, #32 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sqrshrun z0.h, {z1.d-z4.d}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: sqrshrun z0.h, {z1.d-z4.d}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate + +sqrshrun z31.h, {z28.d-z31.d}, #65 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: sqrshrun z31.h, {z28.d-z31.d}, #65 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sqrshrun z23.s, {z12.s-z15.s}, #24 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: sqrshrun z23.s, {z12.s-z15.s}, #24 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sqrshrun.s b/llvm/test/MC/AArch64/SME2/sqrshrun.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sqrshrun.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sqrshrun z0.b, {z0.s - z3.s}, #32 // 11000001-01100000-11011100-01000000 +// CHECK-INST: sqrshrun z0.b, { z0.s - z3.s }, #32 +// CHECK-ENCODING: [0x40,0xdc,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160dc40 + +sqrshrun z21.b, {z8.s - z11.s}, #11 // 11000001-01110101-11011101-01010101 +// CHECK-INST: sqrshrun z21.b, { z8.s - z11.s }, #11 +// CHECK-ENCODING: [0x55,0xdd,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175dd55 + +sqrshrun z23.b, {z12.s - z15.s}, #24 // 11000001-01101000-11011101-11010111 +// CHECK-INST: sqrshrun z23.b, { z12.s - z15.s }, #24 +// CHECK-ENCODING: [0xd7,0xdd,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168ddd7 + +sqrshrun z31.b, {z28.s - z31.s}, #1 // 11000001-01111111-11011111-11011111 +// CHECK-INST: sqrshrun z31.b, { z28.s - z31.s }, #1 +// CHECK-ENCODING: [0xdf,0xdf,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fdfdf + + +sqrshrun z0.h, {z0.d - z3.d}, #64 // 11000001-10100000-11011100-01000000 +// CHECK-INST: sqrshrun z0.h, { z0.d - z3.d }, #64 +// CHECK-ENCODING: [0x40,0xdc,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0dc40 + +sqrshrun z21.h, {z8.d - z11.d}, #11 // 11000001-11110101-11011101-01010101 +// CHECK-INST: sqrshrun z21.h, { z8.d - z11.d }, #11 +// CHECK-ENCODING: [0x55,0xdd,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5dd55 + +sqrshrun z23.h, {z12.d - z15.d}, #24 // 11000001-11101000-11011101-11010111 +// CHECK-INST: sqrshrun z23.h, { z12.d - z15.d }, #24 +// CHECK-ENCODING: [0xd7,0xdd,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8ddd7 + +sqrshrun z31.h, {z28.d - z31.d}, #1 // 11000001-11111111-11011111-11011111 +// CHECK-INST: sqrshrun z31.h, { z28.d - z31.d }, #1 +// CHECK-ENCODING: [0xdf,0xdf,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffdfdf + diff --git a/llvm/test/MC/AArch64/SME2/srshl-diagnostics.s b/llvm/test/MC/AArch64/SME2/srshl-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/srshl-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +srshl {z0.h-z2.h}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: srshl {z0.h-z2.h}, {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +srshl {z0.s-z1.s}, {z2.s-z4.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: srshl {z0.s-z1.s}, {z2.s-z4.s}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +srshl {z20.d-z23.d}, {z20.d-z23.d}, {z8.d-z12.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: srshl {z20.d-z23.d}, {z20.d-z23.d}, {z8.d-z12.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +srshl {z29.b-z30.b}, {z30.b-z31.b}, z15.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: srshl {z29.b-z30.b}, {z30.b-z31.b}, z15.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +srshl {z20.h-z23.h}, {z21.h-z24.h}, {z8.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: srshl {z20.h-z23.h}, {z21.h-z24.h}, {z8.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +srshl {z28.b-z31.b}, {z28.b-z31.b}, {z27.b-z30.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: srshl {z28.b-z31.b}, {z28.b-z31.b}, {z27.b-z30.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Single Register + +srshl {z20.h-z21.h}, {z20.h-z21.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: srshl {z20.h-z21.h}, {z20.h-z21.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +srshl {z0.d-z3.d}, {z0.d-z3.d}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.d..z15.d +// CHECK-NEXT: srshl {z0.d-z3.d}, {z0.d-z3.d}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/srshl.s b/llvm/test/MC/AArch64/SME2/srshl.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/srshl.s @@ -0,0 +1,413 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +srshl {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100010-00100000 +// CHECK-INST: srshl { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x20,0xa2,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a220 + +srshl {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100010-00110100 +// CHECK-INST: srshl { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x34,0xa2,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a234 + +srshl {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100010-00110110 +// CHECK-INST: srshl { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x36,0xa2,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a236 + +srshl {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100010-00111110 +// CHECK-INST: srshl { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x3e,0xa2,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa23e + + +srshl {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110010-00100000 +// CHECK-INST: srshl { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x20,0xb2,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b220 + +srshl {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110010-00110100 +// CHECK-INST: srshl { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x34,0xb2,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b234 + +srshl {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110010-00110110 +// CHECK-INST: srshl { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x36,0xb2,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b236 + +srshl {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110010-00111110 +// CHECK-INST: srshl { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x3e,0xb2,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb23e + + +srshl {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100010-00100000 +// CHECK-INST: srshl { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x20,0xa2,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a220 + +srshl {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100010-00110100 +// CHECK-INST: srshl { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x34,0xa2,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a234 + +srshl {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100010-00110110 +// CHECK-INST: srshl { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x36,0xa2,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a236 + +srshl {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100010-00111110 +// CHECK-INST: srshl { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x3e,0xa2,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa23e + + +srshl {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110010-00100000 +// CHECK-INST: srshl { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x20,0xb2,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b220 + +srshl {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110010-00110100 +// CHECK-INST: srshl { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x34,0xb2,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b234 + +srshl {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110010-00110110 +// CHECK-INST: srshl { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x36,0xb2,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b236 + +srshl {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110010-00111110 +// CHECK-INST: srshl { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x3e,0xb2,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb23e + + +srshl {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100010-00100000 +// CHECK-INST: srshl { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x20,0xa2,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a220 + +srshl {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100010-00110100 +// CHECK-INST: srshl { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x34,0xa2,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a234 + +srshl {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100010-00110110 +// CHECK-INST: srshl { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x36,0xa2,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a236 + +srshl {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100010-00111110 +// CHECK-INST: srshl { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x3e,0xa2,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa23e + + +srshl {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110010-00100000 +// CHECK-INST: srshl { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x20,0xb2,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b220 + +srshl {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110010-00110100 +// CHECK-INST: srshl { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x34,0xb2,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b234 + +srshl {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110010-00110110 +// CHECK-INST: srshl { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x36,0xb2,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b236 + +srshl {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110010-00111110 +// CHECK-INST: srshl { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x3e,0xb2,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb23e + + +srshl {z0.b, z1.b}, {z0.b, z1.b}, z0.b // 11000001-00100000-10100010-00100000 +// CHECK-INST: srshl { z0.b, z1.b }, { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x20,0xa2,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a220 + +srshl {z20.b, z21.b}, {z20.b, z21.b}, z5.b // 11000001-00100101-10100010-00110100 +// CHECK-INST: srshl { z20.b, z21.b }, { z20.b, z21.b }, z5.b +// CHECK-ENCODING: [0x34,0xa2,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a234 + +srshl {z22.b, z23.b}, {z22.b, z23.b}, z8.b // 11000001-00101000-10100010-00110110 +// CHECK-INST: srshl { z22.b, z23.b }, { z22.b, z23.b }, z8.b +// CHECK-ENCODING: [0x36,0xa2,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a236 + +srshl {z30.b, z31.b}, {z30.b, z31.b}, z15.b // 11000001-00101111-10100010-00111110 +// CHECK-INST: srshl { z30.b, z31.b }, { z30.b, z31.b }, z15.b +// CHECK-ENCODING: [0x3e,0xa2,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa23e + + +srshl {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110010-00100000 +// CHECK-INST: srshl { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x20,0xb2,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b220 + +srshl {z20.b, z21.b}, {z20.b, z21.b}, {z20.b, z21.b} // 11000001-00110100-10110010-00110100 +// CHECK-INST: srshl { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x34,0xb2,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b234 + +srshl {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110010-00110110 +// CHECK-INST: srshl { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x36,0xb2,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b236 + +srshl {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110010-00111110 +// CHECK-INST: srshl { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x3e,0xb2,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb23e + + +srshl {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101010-00100000 +// CHECK-INST: srshl { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x20,0xaa,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160aa20 + +srshl {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101010-00110100 +// CHECK-INST: srshl { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x34,0xaa,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165aa34 + +srshl {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101010-00110100 +// CHECK-INST: srshl { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x34,0xaa,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168aa34 + +srshl {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101010-00111100 +// CHECK-INST: srshl { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x3c,0xaa,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16faa3c + + +srshl {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111010-00100000 +// CHECK-INST: srshl { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x20,0xba,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160ba20 + +srshl {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111010-00110100 +// CHECK-INST: srshl { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x34,0xba,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174ba34 + +srshl {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111010-00110100 +// CHECK-INST: srshl { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x34,0xba,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168ba34 + +srshl {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111010-00111100 +// CHECK-INST: srshl { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x3c,0xba,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cba3c + + +srshl {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101010-00100000 +// CHECK-INST: srshl { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x20,0xaa,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0aa20 + +srshl {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101010-00110100 +// CHECK-INST: srshl { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x34,0xaa,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5aa34 + +srshl {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101010-00110100 +// CHECK-INST: srshl { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x34,0xaa,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8aa34 + +srshl {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101010-00111100 +// CHECK-INST: srshl { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x3c,0xaa,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afaa3c + + +srshl {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111010-00100000 +// CHECK-INST: srshl { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x20,0xba,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0ba20 + +srshl {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111010-00110100 +// CHECK-INST: srshl { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x34,0xba,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4ba34 + +srshl {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111010-00110100 +// CHECK-INST: srshl { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x34,0xba,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8ba34 + +srshl {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111010-00111100 +// CHECK-INST: srshl { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x3c,0xba,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcba3c + + +srshl {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101010-00100000 +// CHECK-INST: srshl { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x20,0xaa,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0aa20 + +srshl {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101010-00110100 +// CHECK-INST: srshl { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x34,0xaa,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5aa34 + +srshl {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101010-00110100 +// CHECK-INST: srshl { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x34,0xaa,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8aa34 + +srshl {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101010-00111100 +// CHECK-INST: srshl { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x3c,0xaa,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efaa3c + + +srshl {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111010-00100000 +// CHECK-INST: srshl { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x20,0xba,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0ba20 + +srshl {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111010-00110100 +// CHECK-INST: srshl { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x34,0xba,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4ba34 + +srshl {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111010-00110100 +// CHECK-INST: srshl { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x34,0xba,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8ba34 + +srshl {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111010-00111100 +// CHECK-INST: srshl { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x3c,0xba,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcba3c + + +srshl {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101010-00100000 +// CHECK-INST: srshl { z0.b - z3.b }, { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x20,0xaa,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120aa20 + +srshl {z20.b - z23.b}, {z20.b - z23.b}, z5.b // 11000001-00100101-10101010-00110100 +// CHECK-INST: srshl { z20.b - z23.b }, { z20.b - z23.b }, z5.b +// CHECK-ENCODING: [0x34,0xaa,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125aa34 + +srshl {z20.b - z23.b}, {z20.b - z23.b}, z8.b // 11000001-00101000-10101010-00110100 +// CHECK-INST: srshl { z20.b - z23.b }, { z20.b - z23.b }, z8.b +// CHECK-ENCODING: [0x34,0xaa,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128aa34 + +srshl {z28.b - z31.b}, {z28.b - z31.b}, z15.b // 11000001-00101111-10101010-00111100 +// CHECK-INST: srshl { z28.b - z31.b }, { z28.b - z31.b }, z15.b +// CHECK-ENCODING: [0x3c,0xaa,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12faa3c + + +srshl {z0.b - z3.b}, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100000-10111010-00100000 +// CHECK-INST: srshl { z0.b - z3.b }, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x20,0xba,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120ba20 + +srshl {z20.b - z23.b}, {z20.b - z23.b}, {z20.b - z23.b} // 11000001-00110100-10111010-00110100 +// CHECK-INST: srshl { z20.b - z23.b }, { z20.b - z23.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x34,0xba,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134ba34 + +srshl {z20.b - z23.b}, {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00101000-10111010-00110100 +// CHECK-INST: srshl { z20.b - z23.b }, { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x34,0xba,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128ba34 + +srshl {z28.b - z31.b}, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111100-10111010-00111100 +// CHECK-INST: srshl { z28.b - z31.b }, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x3c,0xba,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13cba3c + diff --git a/llvm/test/MC/AArch64/SME2/st1b b/llvm/test/MC/AArch64/SME2/st1b new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/st1b @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1b {z0.b, z1.b}, pn8, [x0, x0] // 10100000, 00100000, 00000000, 00000000 +// CHECK, INST: st1b { z0.b, z1.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x00,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0200000 + +st1b {z20.b, z21.b}, pn13, [x10, x21] // 10100000, 00110101, 00010101, 01010100 +// CHECK, INST: st1b { z20.b, z21.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x54,0x15,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0351554 + +st1b {z22.b, z23.b}, pn11, [x13, x8] // 10100000, 00101000, 00001101, 10110110 +// CHECK, INST: st1b { z22.b, z23.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb6,0x0d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0280db6 + +st1b {z30.b, z31.b}, pn15, [sp, xzr] // 10100000, 00111111, 00011111, 11111110 +// CHECK, INST: st1b { z30.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xfe,0x1f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f1ffe + + +st1b {z0.b, z1.b}, pn8, [x0] // 10100000, 01100000, 00000000, 00000000 +// CHECK, INST: st1b { z0.b, z1.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x00,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0600000 + +st1b {z20.b, z21.b}, pn13, [x10, #10, mul vl] // 10100000, 01100101, 00010101, 01010100 +// CHECK, INST: st1b { z20.b, z21.b }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x15,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0651554 + +st1b {z22.b, z23.b}, pn11, [x13, #, 16, mul vl] // 10100000, 01101000, 00001101, 10110110 +// CHECK, INST: st1b { z22.b, z23.b }, pn11, [x13, #, 16, mul vl] +// CHECK-ENCODING: [0xb6,0x0d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0680db6 + +st1b {z30.b, z31.b}, pn15, [sp, #, 2, mul vl] // 10100000, 01101111, 00011111, 11111110 +// CHECK, INST: st1b { z30.b, z31.b }, pn15, [sp, #, 2, mul vl] +// CHECK-ENCODING: [0xfe,0x1f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f1ffe + + +st1b {z0.b, z8.b}, pn8, [x0, x0] // 10100001-00100000-00000000-00000000 +// CHECK-INST: st1b { z0.b, z8.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x00,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1200000 + +st1b {z21.b, z29.b}, pn13, [x10, x21] // 10100001-00110101-00010101-01010101 +// CHECK-INST: st1b { z21.b, z29.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x55,0x15,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1351555 + +st1b {z23.b, z31.b}, pn11, [x13, x8] // 10100001-00101000-00001101-10110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb7,0x0d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1280db7 + +st1b {z23.b, z31.b}, pn15, [sp, xzr] // 10100001-00111111-00011111-11110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xf7,0x1f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f1ff7 + + +st1b {z0.b, z8.b}, pn8, [x0] // 10100001-01100000-00000000-00000000 +// CHECK-INST: st1b { z0.b, z8.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x00,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1600000 + +st1b {z21.b, z29.b}, pn13, [x10, #10, mul vl] // 10100001-01100101-00010101-01010101 +// CHECK-INST: st1b { z21.b, z29.b }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x15,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1651555 + +st1b {z23.b, z31.b}, pn11, [x13, #-16, mul vl] // 10100001-01101000-00001101-10110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1680db7 + +st1b {z23.b, z31.b}, pn15, [sp, #-2, mul vl] // 10100001-01101111-00011111-11110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x1f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f1ff7 + + +st1b {z0.b-z3.b}, pn8, [x0, x0] // 10100000-00100000-10000000-00000000 +// CHECK-INST: st1b { z0.b-z3.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x80,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0208000 + +st1b {z20.b-z23.b}, pn13, [x10, x21] // 10100000-00110101-10010101-01010100 +// CHECK-INST: st1b { z20.b-z23.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x54,0x95,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0359554 + +st1b {z20.b-z23.b}, pn11, [x13, x8] // 10100000-00101000-10001101-10110100 +// CHECK-INST: st1b { z20.b-z23.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb4,0x8d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0288db4 + +st1b {z28.b-z31.b}, pn15, [sp, xzr] // 10100000-00111111-10011111-11111100 +// CHECK-INST: st1b { z28.b-z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xfc,0x9f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f9ffc + + +st1b {z0.b-z3.b}, pn8, [x0] // 10100000-01100000-10000000-00000000 +// CHECK-INST: st1b { z0.b-z3.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x80,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0608000 + +st1b {z20.b-z23.b}, pn13, [x10, #20, mul vl] // 10100000-01100101-10010101-01010100 +// CHECK-INST: st1b { z20.b-z23.b }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0x95,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0659554 + +st1b {z20.b-z23.b}, pn11, [x13, #-32, mul vl] // 10100000-01101000-10001101-10110100 +// CHECK-INST: st1b { z20.b-z23.b }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0x8d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0688db4 + +st1b {z28.b-z31.b}, pn15, [sp, #-4, mul vl] // 10100000-01101111-10011111-11111100 +// CHECK-INST: st1b { z28.b-z31.b }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0x9f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f9ffc + + +st1b {z0.b, z4.b, z8.b, z12.b}, pn8, [x0, x0] // 10100001-00100000-10000000-00000000 +// CHECK-INST: st1b { z0.b, z4.b, z8.b, z12.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x80,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1208000 + +st1b {z17.b, z21.b, z25.b, z29.b}, pn13, [x10, x21] // 10100001-00110101-10010101-01010001 +// CHECK-INST: st1b { z17.b, z21.b, z25.b, z29.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x51,0x95,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1359551 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn11, [x13, x8] // 10100001-00101000-10001101-10110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb3,0x8d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1288db3 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn15, [sp, xzr] // 10100001-00111111-10011111-11110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xf3,0x9f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f9ff3 + + +st1b {z0.b, z4.b, z8.b, z12.b}, pn8, [x0] // 10100001-01100000-10000000-00000000 +// CHECK-INST: st1b { z0.b, z4.b, z8.b, z12.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x80,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1608000 + +st1b {z17.b, z21.b, z25.b, z29.b}, pn13, [x10, #20, mul vl] // 10100001-01100101-10010101-01010001 +// CHECK-INST: st1b { z17.b, z21.b, z25.b, z29.b }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0x95,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1659551 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn11, [x13, #-32, mul vl] // 10100001-01101000-10001101-10110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0x8d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1688db3 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn15, [sp, #-4, mul vl] // 10100001-01101111-10011111-11110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0x9f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f9ff3 + diff --git a/llvm/test/MC/AArch64/SME2/st1b.s b/llvm/test/MC/AArch64/SME2/st1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/st1b.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1b {z0.b, z8.b}, pn8, [x0, x0] // 10100001-00100000-00000000-00000000 +// CHECK-INST: st1b { z0.b, z8.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x00,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1200000 + +st1b {z21.b, z29.b}, pn13, [x10, x21] // 10100001-00110101-00010101-01010101 +// CHECK-INST: st1b { z21.b, z29.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x55,0x15,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1351555 + +st1b {z23.b, z31.b}, pn11, [x13, x8] // 10100001-00101000-00001101-10110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb7,0x0d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1280db7 + +st1b {z23.b, z31.b}, pn15, [sp, xzr] // 10100001-00111111-00011111-11110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xf7,0x1f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f1ff7 + + +st1b {z0.b, z8.b}, pn8, [x0] // 10100001-01100000-00000000-00000000 +// CHECK-INST: st1b { z0.b, z8.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x00,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1600000 + +st1b {z21.b, z29.b}, pn13, [x10, #10, mul vl] // 10100001-01100101-00010101-01010101 +// CHECK-INST: st1b { z21.b, z29.b }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x15,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1651555 + +st1b {z23.b, z31.b}, pn11, [x13, #-16, mul vl] // 10100001-01101000-00001101-10110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1680db7 + +st1b {z23.b, z31.b}, pn15, [sp, #-2, mul vl] // 10100001-01101111-00011111-11110111 +// CHECK-INST: st1b { z23.b, z31.b }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x1f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f1ff7 + + +st1b {z0.b, z4.b, z8.b, z12.b}, pn8, [x0, x0] // 10100001-00100000-10000000-00000000 +// CHECK-INST: st1b { z0.b, z4.b, z8.b, z12.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x80,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1208000 + +st1b {z17.b, z21.b, z25.b, z29.b}, pn13, [x10, x21] // 10100001-00110101-10010101-01010001 +// CHECK-INST: st1b { z17.b, z21.b, z25.b, z29.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x51,0x95,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1359551 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn11, [x13, x8] // 10100001-00101000-10001101-10110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb3,0x8d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1288db3 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn15, [sp, xzr] // 10100001-00111111-10011111-11110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xf3,0x9f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f9ff3 + + +st1b {z0.b, z4.b, z8.b, z12.b}, pn8, [x0] // 10100001-01100000-10000000-00000000 +// CHECK-INST: st1b { z0.b, z4.b, z8.b, z12.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x80,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1608000 + +st1b {z17.b, z21.b, z25.b, z29.b}, pn13, [x10, #20, mul vl] // 10100001-01100101-10010101-01010001 +// CHECK-INST: st1b { z17.b, z21.b, z25.b, z29.b }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0x95,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1659551 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn11, [x13, #-32, mul vl] // 10100001-01101000-10001101-10110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0x8d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1688db3 + +st1b {z19.b, z23.b, z27.b, z31.b}, pn15, [sp, #-4, mul vl] // 10100001-01101111-10011111-11110011 +// CHECK-INST: st1b { z19.b, z23.b, z27.b, z31.b }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0x9f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f9ff3 + diff --git a/llvm/test/MC/AArch64/SME2/st1d.s b/llvm/test/MC/AArch64/SME2/st1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/st1d.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1d {z0.d, z8.d}, pn8, [x0, x0, lsl #3] // 10100001-00100000-01100000-00000000 +// CHECK-INST: st1d { z0.d, z8.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x60,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1206000 + +st1d {z21.d, z29.d}, pn13, [x10, x21, lsl #3] // 10100001-00110101-01110101-01010101 +// CHECK-INST: st1d { z21.d, z29.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x75,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1357555 + +st1d {z23.d, z31.d}, pn11, [x13, x8, lsl #3] // 10100001-00101000-01101101-10110111 +// CHECK-INST: st1d { z23.d, z31.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x6d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1286db7 + +st1d {z23.d, z31.d}, pn15, [sp, xzr, lsl #3] // 10100001-00111111-01111111-11110111 +// CHECK-INST: st1d { z23.d, z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xf7,0x7f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f7ff7 + + +st1d {z0.d, z8.d}, pn8, [x0] // 10100001-01100000-01100000-00000000 +// CHECK-INST: st1d { z0.d, z8.d }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x60,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1606000 + +st1d {z21.d, z29.d}, pn13, [x10, #10, mul vl] // 10100001-01100101-01110101-01010101 +// CHECK-INST: st1d { z21.d, z29.d }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x75,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1657555 + +st1d {z23.d, z31.d}, pn11, [x13, #-16, mul vl] // 10100001-01101000-01101101-10110111 +// CHECK-INST: st1d { z23.d, z31.d }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x6d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1686db7 + +st1d {z23.d, z31.d}, pn15, [sp, #-2, mul vl] // 10100001-01101111-01111111-11110111 +// CHECK-INST: st1d { z23.d, z31.d }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x7f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f7ff7 + + +st1d {z0.d, z4.d, z8.d, z12.d}, pn8, [x0, x0, lsl #3] // 10100001-00100000-11100000-00000000 +// CHECK-INST: st1d { z0.d, z4.d, z8.d, z12.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0xe0,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a120e000 + +st1d {z17.d, z21.d, z25.d, z29.d}, pn13, [x10, x21, lsl #3] // 10100001-00110101-11110101-01010001 +// CHECK-INST: st1d { z17.d, z21.d, z25.d, z29.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x51,0xf5,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135f551 + +st1d {z19.d, z23.d, z27.d, z31.d}, pn11, [x13, x8, lsl #3] // 10100001-00101000-11101101-10110011 +// CHECK-INST: st1d { z19.d, z23.d, z27.d, z31.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb3,0xed,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a128edb3 + +st1d {z19.d, z23.d, z27.d, z31.d}, pn15, [sp, xzr, lsl #3] // 10100001-00111111-11111111-11110011 +// CHECK-INST: st1d { z19.d, z23.d, z27.d, z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xf3,0xff,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13ffff3 + + +st1d {z0.d, z4.d, z8.d, z12.d}, pn8, [x0] // 10100001-01100000-11100000-00000000 +// CHECK-INST: st1d { z0.d, z4.d, z8.d, z12.d }, pn8, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a160e000 + +st1d {z17.d, z21.d, z25.d, z29.d}, pn13, [x10, #20, mul vl] // 10100001-01100101-11110101-01010001 +// CHECK-INST: st1d { z17.d, z21.d, z25.d, z29.d }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0xf5,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165f551 + +st1d {z19.d, z23.d, z27.d, z31.d}, pn11, [x13, #-32, mul vl] // 10100001-01101000-11101101-10110011 +// CHECK-INST: st1d { z19.d, z23.d, z27.d, z31.d }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0xed,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a168edb3 + +st1d {z19.d, z23.d, z27.d, z31.d}, pn15, [sp, #-4, mul vl] // 10100001-01101111-11111111-11110011 +// CHECK-INST: st1d { z19.d, z23.d, z27.d, z31.d }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0xff,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16ffff3 + diff --git a/llvm/test/MC/AArch64/SME2/st1h.s b/llvm/test/MC/AArch64/SME2/st1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/st1h.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1h {z0.h, z8.h}, pn8, [x0, x0, lsl #1] // 10100001-00100000-00100000-00000000 +// CHECK-INST: st1h { z0.h, z8.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0x20,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1202000 + +st1h {z21.h, z29.h}, pn13, [x10, x21, lsl #1] // 10100001-00110101-00110101-01010101 +// CHECK-INST: st1h { z21.h, z29.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x55,0x35,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1353555 + +st1h {z23.h, z31.h}, pn11, [x13, x8, lsl #1] // 10100001-00101000-00101101-10110111 +// CHECK-INST: st1h { z23.h, z31.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb7,0x2d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1282db7 + +st1h {z23.h, z31.h}, pn15, [sp, xzr, lsl #1] // 10100001-00111111-00111111-11110111 +// CHECK-INST: st1h { z23.h, z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xf7,0x3f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f3ff7 + + +st1h {z0.h, z8.h}, pn8, [x0] // 10100001-01100000-00100000-00000000 +// CHECK-INST: st1h { z0.h, z8.h }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x20,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1602000 + +st1h {z21.h, z29.h}, pn13, [x10, #10, mul vl] // 10100001-01100101-00110101-01010101 +// CHECK-INST: st1h { z21.h, z29.h }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1653555 + +st1h {z23.h, z31.h}, pn11, [x13, #-16, mul vl] // 10100001-01101000-00101101-10110111 +// CHECK-INST: st1h { z23.h, z31.h }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1682db7 + +st1h {z23.h, z31.h}, pn15, [sp, #-2, mul vl] // 10100001-01101111-00111111-11110111 +// CHECK-INST: st1h { z23.h, z31.h }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x3f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f3ff7 + + +st1h {z0.h, z4.h, z8.h, z12.h}, pn8, [x0, x0, lsl #1] // 10100001-00100000-10100000-00000000 +// CHECK-INST: st1h { z0.h, z4.h, z8.h, z12.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0xa0,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a120a000 + +st1h {z17.h, z21.h, z25.h, z29.h}, pn13, [x10, x21, lsl #1] // 10100001-00110101-10110101-01010001 +// CHECK-INST: st1h { z17.h, z21.h, z25.h, z29.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x51,0xb5,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135b551 + +st1h {z19.h, z23.h, z27.h, z31.h}, pn11, [x13, x8, lsl #1] // 10100001-00101000-10101101-10110011 +// CHECK-INST: st1h { z19.h, z23.h, z27.h, z31.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb3,0xad,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a128adb3 + +st1h {z19.h, z23.h, z27.h, z31.h}, pn15, [sp, xzr, lsl #1] // 10100001-00111111-10111111-11110011 +// CHECK-INST: st1h { z19.h, z23.h, z27.h, z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xf3,0xbf,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13fbff3 + + +st1h {z0.h, z4.h, z8.h, z12.h}, pn8, [x0] // 10100001-01100000-10100000-00000000 +// CHECK-INST: st1h { z0.h, z4.h, z8.h, z12.h }, pn8, [x0] +// CHECK-ENCODING: [0x00,0xa0,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a160a000 + +st1h {z17.h, z21.h, z25.h, z29.h}, pn13, [x10, #20, mul vl] // 10100001-01100101-10110101-01010001 +// CHECK-INST: st1h { z17.h, z21.h, z25.h, z29.h }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0xb5,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165b551 + +st1h {z19.h, z23.h, z27.h, z31.h}, pn11, [x13, #-32, mul vl] // 10100001-01101000-10101101-10110011 +// CHECK-INST: st1h { z19.h, z23.h, z27.h, z31.h }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0xad,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a168adb3 + +st1h {z19.h, z23.h, z27.h, z31.h}, pn15, [sp, #-4, mul vl] // 10100001-01101111-10111111-11110011 +// CHECK-INST: st1h { z19.h, z23.h, z27.h, z31.h }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0xbf,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16fbff3 + diff --git a/llvm/test/MC/AArch64/SME2/st1w.s b/llvm/test/MC/AArch64/SME2/st1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/st1w.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +st1w {z0.s, z8.s}, pn8, [x0, x0, lsl #2] // 10100001-00100000-01000000-00000000 +// CHECK-INST: st1w { z0.s, z8.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x40,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1204000 + +st1w {z21.s, z29.s}, pn13, [x10, x21, lsl #2] // 10100001-00110101-01010101-01010101 +// CHECK-INST: st1w { z21.s, z29.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x55,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1355555 + +st1w {z23.s, z31.s}, pn11, [x13, x8, lsl #2] // 10100001-00101000-01001101-10110111 +// CHECK-INST: st1w { z23.s, z31.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1284db7 + +st1w {z23.s, z31.s}, pn15, [sp, xzr, lsl #2] // 10100001-00111111-01011111-11110111 +// CHECK-INST: st1w { z23.s, z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xf7,0x5f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f5ff7 + + +st1w {z0.s, z8.s}, pn8, [x0] // 10100001-01100000-01000000-00000000 +// CHECK-INST: st1w { z0.s, z8.s }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x40,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1604000 + +st1w {z21.s, z29.s}, pn13, [x10, #10, mul vl] // 10100001-01100101-01010101-01010101 +// CHECK-INST: st1w { z21.s, z29.s }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x55,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1655555 + +st1w {z23.s, z31.s}, pn11, [x13, #-16, mul vl] // 10100001-01101000-01001101-10110111 +// CHECK-INST: st1w { z23.s, z31.s }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x4d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1684db7 + +st1w {z23.s, z31.s}, pn15, [sp, #-2, mul vl] // 10100001-01101111-01011111-11110111 +// CHECK-INST: st1w { z23.s, z31.s }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xf7,0x5f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f5ff7 + + +st1w {z0.s, z4.s, z8.s, z12.s}, pn8, [x0, x0, lsl #2] // 10100001-00100000-11000000-00000000 +// CHECK-INST: st1w { z0.s, z4.s, z8.s, z12.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0xc0,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a120c000 + +st1w {z17.s, z21.s, z25.s, z29.s}, pn13, [x10, x21, lsl #2] // 10100001-00110101-11010101-01010001 +// CHECK-INST: st1w { z17.s, z21.s, z25.s, z29.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x51,0xd5,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135d551 + +st1w {z19.s, z23.s, z27.s, z31.s}, pn11, [x13, x8, lsl #2] // 10100001-00101000-11001101-10110011 +// CHECK-INST: st1w { z19.s, z23.s, z27.s, z31.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb3,0xcd,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a128cdb3 + +st1w {z19.s, z23.s, z27.s, z31.s}, pn15, [sp, xzr, lsl #2] // 10100001-00111111-11011111-11110011 +// CHECK-INST: st1w { z19.s, z23.s, z27.s, z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xf3,0xdf,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13fdff3 + + +st1w {z0.s, z4.s, z8.s, z12.s}, pn8, [x0] // 10100001-01100000-11000000-00000000 +// CHECK-INST: st1w { z0.s, z4.s, z8.s, z12.s }, pn8, [x0] +// CHECK-ENCODING: [0x00,0xc0,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a160c000 + +st1w {z17.s, z21.s, z25.s, z29.s}, pn13, [x10, #20, mul vl] // 10100001-01100101-11010101-01010001 +// CHECK-INST: st1w { z17.s, z21.s, z25.s, z29.s }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x51,0xd5,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165d551 + +st1w {z19.s, z23.s, z27.s, z31.s}, pn11, [x13, #-32, mul vl] // 10100001-01101000-11001101-10110011 +// CHECK-INST: st1w { z19.s, z23.s, z27.s, z31.s }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb3,0xcd,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a168cdb3 + +st1w {z19.s, z23.s, z27.s, z31.s}, pn15, [sp, #-4, mul vl] // 10100001-01101111-11011111-11110011 +// CHECK-INST: st1w { z19.s, z23.s, z27.s, z31.s }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xf3,0xdf,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16fdff3 + diff --git a/llvm/test/MC/AArch64/SME2/stnt1b.s b/llvm/test/MC/AArch64/SME2/stnt1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/stnt1b.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +stnt1b {z0.b, z8.b}, pn8, [x0, x0] // 10100001-00100000-00000000-00001000 +// CHECK-INST: stnt1b { z0.b, z8.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x08,0x00,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1200008 + +stnt1b {z21.b, z29.b}, pn13, [x10, x21] // 10100001-00110101-00010101-01011101 +// CHECK-INST: stnt1b { z21.b, z29.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x5d,0x15,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135155d + +stnt1b {z23.b, z31.b}, pn11, [x13, x8] // 10100001-00101000-00001101-10111111 +// CHECK-INST: stnt1b { z23.b, z31.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xbf,0x0d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1280dbf + +stnt1b {z23.b, z31.b}, pn15, [sp, xzr] // 10100001-00111111-00011111-11111111 +// CHECK-INST: stnt1b { z23.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xff,0x1f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f1fff + + +stnt1b {z0.b, z8.b}, pn8, [x0] // 10100001-01100000-00000000-00001000 +// CHECK-INST: stnt1b { z0.b, z8.b }, pn8, [x0] +// CHECK-ENCODING: [0x08,0x00,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1600008 + +stnt1b {z21.b, z29.b}, pn13, [x10, #10, mul vl] // 10100001-01100101-00010101-01011101 +// CHECK-INST: stnt1b { z21.b, z29.b }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x15,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165155d + +stnt1b {z23.b, z31.b}, pn11, [x13, #-16, mul vl] // 10100001-01101000-00001101-10111111 +// CHECK-INST: stnt1b { z23.b, z31.b }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x0d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1680dbf + +stnt1b {z23.b, z31.b}, pn15, [sp, #-2, mul vl] // 10100001-01101111-00011111-11111111 +// CHECK-INST: stnt1b { z23.b, z31.b }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x1f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f1fff + + +stnt1b {z0.b, z4.b, z8.b, z12.b}, pn8, [x0, x0] // 10100001-00100000-10000000-00001000 +// CHECK-INST: stnt1b { z0.b, z4.b, z8.b, z12.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x08,0x80,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1208008 + +stnt1b {z17.b, z21.b, z25.b, z29.b}, pn13, [x10, x21] // 10100001-00110101-10010101-01011001 +// CHECK-INST: stnt1b { z17.b, z21.b, z25.b, z29.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x59,0x95,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1359559 + +stnt1b {z19.b, z23.b, z27.b, z31.b}, pn11, [x13, x8] // 10100001-00101000-10001101-10111011 +// CHECK-INST: stnt1b { z19.b, z23.b, z27.b, z31.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xbb,0x8d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1288dbb + +stnt1b {z19.b, z23.b, z27.b, z31.b}, pn15, [sp, xzr] // 10100001-00111111-10011111-11111011 +// CHECK-INST: stnt1b { z19.b, z23.b, z27.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xfb,0x9f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f9ffb + + +stnt1b {z0.b, z4.b, z8.b, z12.b}, pn8, [x0] // 10100001-01100000-10000000-00001000 +// CHECK-INST: stnt1b { z0.b, z4.b, z8.b, z12.b }, pn8, [x0] +// CHECK-ENCODING: [0x08,0x80,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1608008 + +stnt1b {z17.b, z21.b, z25.b, z29.b}, pn13, [x10, #20, mul vl] // 10100001-01100101-10010101-01011001 +// CHECK-INST: stnt1b { z17.b, z21.b, z25.b, z29.b }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0x95,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1659559 + +stnt1b {z19.b, z23.b, z27.b, z31.b}, pn11, [x13, #-32, mul vl] // 10100001-01101000-10001101-10111011 +// CHECK-INST: stnt1b { z19.b, z23.b, z27.b, z31.b }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0x8d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1688dbb + +stnt1b {z19.b, z23.b, z27.b, z31.b}, pn15, [sp, #-4, mul vl] // 10100001-01101111-10011111-11111011 +// CHECK-INST: stnt1b { z19.b, z23.b, z27.b, z31.b }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0x9f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f9ffb + diff --git a/llvm/test/MC/AArch64/SME2/stnt1d.s b/llvm/test/MC/AArch64/SME2/stnt1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/stnt1d.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +stnt1d {z0.d, z8.d}, pn8, [x0, x0, lsl #3] // 10100001-00100000-01100000-00001000 +// CHECK-INST: stnt1d { z0.d, z8.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x08,0x60,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1206008 + +stnt1d {z21.d, z29.d}, pn13, [x10, x21, lsl #3] // 10100001-00110101-01110101-01011101 +// CHECK-INST: stnt1d { z21.d, z29.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x5d,0x75,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135755d + +stnt1d {z23.d, z31.d}, pn11, [x13, x8, lsl #3] // 10100001-00101000-01101101-10111111 +// CHECK-INST: stnt1d { z23.d, z31.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xbf,0x6d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1286dbf + +stnt1d {z23.d, z31.d}, pn15, [sp, xzr, lsl #3] // 10100001-00111111-01111111-11111111 +// CHECK-INST: stnt1d { z23.d, z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xff,0x7f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f7fff + + +stnt1d {z0.d, z8.d}, pn8, [x0] // 10100001-01100000-01100000-00001000 +// CHECK-INST: stnt1d { z0.d, z8.d }, pn8, [x0] +// CHECK-ENCODING: [0x08,0x60,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1606008 + +stnt1d {z21.d, z29.d}, pn13, [x10, #10, mul vl] // 10100001-01100101-01110101-01011101 +// CHECK-INST: stnt1d { z21.d, z29.d }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x75,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165755d + +stnt1d {z23.d, z31.d}, pn11, [x13, #-16, mul vl] // 10100001-01101000-01101101-10111111 +// CHECK-INST: stnt1d { z23.d, z31.d }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x6d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1686dbf + +stnt1d {z23.d, z31.d}, pn15, [sp, #-2, mul vl] // 10100001-01101111-01111111-11111111 +// CHECK-INST: stnt1d { z23.d, z31.d }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x7f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f7fff + + +stnt1d {z0.d, z4.d, z8.d, z12.d}, pn8, [x0, x0, lsl #3] // 10100001-00100000-11100000-00001000 +// CHECK-INST: stnt1d { z0.d, z4.d, z8.d, z12.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x08,0xe0,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a120e008 + +stnt1d {z17.d, z21.d, z25.d, z29.d}, pn13, [x10, x21, lsl #3] // 10100001-00110101-11110101-01011001 +// CHECK-INST: stnt1d { z17.d, z21.d, z25.d, z29.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x59,0xf5,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135f559 + +stnt1d {z19.d, z23.d, z27.d, z31.d}, pn11, [x13, x8, lsl #3] // 10100001-00101000-11101101-10111011 +// CHECK-INST: stnt1d { z19.d, z23.d, z27.d, z31.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xbb,0xed,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a128edbb + +stnt1d {z19.d, z23.d, z27.d, z31.d}, pn15, [sp, xzr, lsl #3] // 10100001-00111111-11111111-11111011 +// CHECK-INST: stnt1d { z19.d, z23.d, z27.d, z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfb,0xff,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13ffffb + + +stnt1d {z0.d, z4.d, z8.d, z12.d}, pn8, [x0] // 10100001-01100000-11100000-00001000 +// CHECK-INST: stnt1d { z0.d, z4.d, z8.d, z12.d }, pn8, [x0] +// CHECK-ENCODING: [0x08,0xe0,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a160e008 + +stnt1d {z17.d, z21.d, z25.d, z29.d}, pn13, [x10, #20, mul vl] // 10100001-01100101-11110101-01011001 +// CHECK-INST: stnt1d { z17.d, z21.d, z25.d, z29.d }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0xf5,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165f559 + +stnt1d {z19.d, z23.d, z27.d, z31.d}, pn11, [x13, #-32, mul vl] // 10100001-01101000-11101101-10111011 +// CHECK-INST: stnt1d { z19.d, z23.d, z27.d, z31.d }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0xed,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a168edbb + +stnt1d {z19.d, z23.d, z27.d, z31.d}, pn15, [sp, #-4, mul vl] // 10100001-01101111-11111111-11111011 +// CHECK-INST: stnt1d { z19.d, z23.d, z27.d, z31.d }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0xff,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16ffffb + diff --git a/llvm/test/MC/AArch64/SME2/stnt1h.s b/llvm/test/MC/AArch64/SME2/stnt1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/stnt1h.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +stnt1h {z0.h, z8.h}, pn8, [x0, x0, lsl #1] // 10100001-00100000-00100000-00001000 +// CHECK-INST: stnt1h { z0.h, z8.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x08,0x20,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1202008 + +stnt1h {z21.h, z29.h}, pn13, [x10, x21, lsl #1] // 10100001-00110101-00110101-01011101 +// CHECK-INST: stnt1h { z21.h, z29.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x5d,0x35,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135355d + +stnt1h {z23.h, z31.h}, pn11, [x13, x8, lsl #1] // 10100001-00101000-00101101-10111111 +// CHECK-INST: stnt1h { z23.h, z31.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xbf,0x2d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1282dbf + +stnt1h {z23.h, z31.h}, pn15, [sp, xzr, lsl #1] // 10100001-00111111-00111111-11111111 +// CHECK-INST: stnt1h { z23.h, z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xff,0x3f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f3fff + + +stnt1h {z0.h, z8.h}, pn8, [x0] // 10100001-01100000-00100000-00001000 +// CHECK-INST: stnt1h { z0.h, z8.h }, pn8, [x0] +// CHECK-ENCODING: [0x08,0x20,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1602008 + +stnt1h {z21.h, z29.h}, pn13, [x10, #10, mul vl] // 10100001-01100101-00110101-01011101 +// CHECK-INST: stnt1h { z21.h, z29.h }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x35,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165355d + +stnt1h {z23.h, z31.h}, pn11, [x13, #-16, mul vl] // 10100001-01101000-00101101-10111111 +// CHECK-INST: stnt1h { z23.h, z31.h }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x2d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1682dbf + +stnt1h {z23.h, z31.h}, pn15, [sp, #-2, mul vl] // 10100001-01101111-00111111-11111111 +// CHECK-INST: stnt1h { z23.h, z31.h }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f3fff + + +stnt1h {z0.h, z4.h, z8.h, z12.h}, pn8, [x0, x0, lsl #1] // 10100001-00100000-10100000-00001000 +// CHECK-INST: stnt1h { z0.h, z4.h, z8.h, z12.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x08,0xa0,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a120a008 + +stnt1h {z17.h, z21.h, z25.h, z29.h}, pn13, [x10, x21, lsl #1] // 10100001-00110101-10110101-01011001 +// CHECK-INST: stnt1h { z17.h, z21.h, z25.h, z29.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x59,0xb5,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135b559 + +stnt1h {z19.h, z23.h, z27.h, z31.h}, pn11, [x13, x8, lsl #1] // 10100001-00101000-10101101-10111011 +// CHECK-INST: stnt1h { z19.h, z23.h, z27.h, z31.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xbb,0xad,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a128adbb + +stnt1h {z19.h, z23.h, z27.h, z31.h}, pn15, [sp, xzr, lsl #1] // 10100001-00111111-10111111-11111011 +// CHECK-INST: stnt1h { z19.h, z23.h, z27.h, z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfb,0xbf,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13fbffb + + +stnt1h {z0.h, z4.h, z8.h, z12.h}, pn8, [x0] // 10100001-01100000-10100000-00001000 +// CHECK-INST: stnt1h { z0.h, z4.h, z8.h, z12.h }, pn8, [x0] +// CHECK-ENCODING: [0x08,0xa0,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a160a008 + +stnt1h {z17.h, z21.h, z25.h, z29.h}, pn13, [x10, #20, mul vl] // 10100001-01100101-10110101-01011001 +// CHECK-INST: stnt1h { z17.h, z21.h, z25.h, z29.h }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0xb5,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165b559 + +stnt1h {z19.h, z23.h, z27.h, z31.h}, pn11, [x13, #-32, mul vl] // 10100001-01101000-10101101-10111011 +// CHECK-INST: stnt1h { z19.h, z23.h, z27.h, z31.h }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0xad,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a168adbb + +stnt1h {z19.h, z23.h, z27.h, z31.h}, pn15, [sp, #-4, mul vl] // 10100001-01101111-10111111-11111011 +// CHECK-INST: stnt1h { z19.h, z23.h, z27.h, z31.h }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0xbf,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16fbffb + diff --git a/llvm/test/MC/AArch64/SME2/stnt1w.s b/llvm/test/MC/AArch64/SME2/stnt1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/stnt1w.s @@ -0,0 +1,113 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +stnt1w {z0.s, z8.s}, pn8, [x0, x0, lsl #2] // 10100001-00100000-01000000-00001000 +// CHECK-INST: stnt1w { z0.s, z8.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x08,0x40,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1204008 + +stnt1w {z21.s, z29.s}, pn13, [x10, x21, lsl #2] // 10100001-00110101-01010101-01011101 +// CHECK-INST: stnt1w { z21.s, z29.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x5d,0x55,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135555d + +stnt1w {z23.s, z31.s}, pn11, [x13, x8, lsl #2] // 10100001-00101000-01001101-10111111 +// CHECK-INST: stnt1w { z23.s, z31.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xbf,0x4d,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1284dbf + +stnt1w {z23.s, z31.s}, pn15, [sp, xzr, lsl #2] // 10100001-00111111-01011111-11111111 +// CHECK-INST: stnt1w { z23.s, z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xff,0x5f,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13f5fff + + +stnt1w {z0.s, z8.s}, pn8, [x0] // 10100001-01100000-01000000-00001000 +// CHECK-INST: stnt1w { z0.s, z8.s }, pn8, [x0] +// CHECK-ENCODING: [0x08,0x40,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1604008 + +stnt1w {z21.s, z29.s}, pn13, [x10, #10, mul vl] // 10100001-01100101-01010101-01011101 +// CHECK-INST: stnt1w { z21.s, z29.s }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x5d,0x55,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165555d + +stnt1w {z23.s, z31.s}, pn11, [x13, #-16, mul vl] // 10100001-01101000-01001101-10111111 +// CHECK-INST: stnt1w { z23.s, z31.s }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xbf,0x4d,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1684dbf + +stnt1w {z23.s, z31.s}, pn15, [sp, #-2, mul vl] // 10100001-01101111-01011111-11111111 +// CHECK-INST: stnt1w { z23.s, z31.s }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x5f,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16f5fff + + +stnt1w {z0.s, z4.s, z8.s, z12.s}, pn8, [x0, x0, lsl #2] // 10100001-00100000-11000000-00001000 +// CHECK-INST: stnt1w { z0.s, z4.s, z8.s, z12.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x08,0xc0,0x20,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a120c008 + +stnt1w {z17.s, z21.s, z25.s, z29.s}, pn13, [x10, x21, lsl #2] // 10100001-00110101-11010101-01011001 +// CHECK-INST: stnt1w { z17.s, z21.s, z25.s, z29.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x59,0xd5,0x35,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a135d559 + +stnt1w {z19.s, z23.s, z27.s, z31.s}, pn11, [x13, x8, lsl #2] // 10100001-00101000-11001101-10111011 +// CHECK-INST: stnt1w { z19.s, z23.s, z27.s, z31.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xbb,0xcd,0x28,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a128cdbb + +stnt1w {z19.s, z23.s, z27.s, z31.s}, pn15, [sp, xzr, lsl #2] // 10100001-00111111-11011111-11111011 +// CHECK-INST: stnt1w { z19.s, z23.s, z27.s, z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfb,0xdf,0x3f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a13fdffb + + +stnt1w {z0.s, z4.s, z8.s, z12.s}, pn8, [x0] // 10100001-01100000-11000000-00001000 +// CHECK-INST: stnt1w { z0.s, z4.s, z8.s, z12.s }, pn8, [x0] +// CHECK-ENCODING: [0x08,0xc0,0x60,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a160c008 + +stnt1w {z17.s, z21.s, z25.s, z29.s}, pn13, [x10, #20, mul vl] // 10100001-01100101-11010101-01011001 +// CHECK-INST: stnt1w { z17.s, z21.s, z25.s, z29.s }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x59,0xd5,0x65,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a165d559 + +stnt1w {z19.s, z23.s, z27.s, z31.s}, pn11, [x13, #-32, mul vl] // 10100001-01101000-11001101-10111011 +// CHECK-INST: stnt1w { z19.s, z23.s, z27.s, z31.s }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xbb,0xcd,0x68,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a168cdbb + +stnt1w {z19.s, z23.s, z27.s, z31.s}, pn15, [sp, #-4, mul vl] // 10100001-01101111-11011111-11111011 +// CHECK-INST: stnt1w { z19.s, z23.s, z27.s, z31.s }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfb,0xdf,0x6f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a16fdffb + diff --git a/llvm/test/MC/AArch64/SME2/str-diagnostics.s b/llvm/test/MC/AArch64/SME2/str-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/str-diagnostics.s @@ -0,0 +1,6 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s| FileCheck %s + +str zt, [x0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be zt0 register +// CHECK-NEXT: str zt, [x0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/str.s b/llvm/test/MC/AArch64/SME2/str.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/str.s @@ -0,0 +1,38 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +str zt0, [x0] // 11100001-00111111-10000000-00000000 +// CHECK-INST: str zt0, [x0] +// CHECK-ENCODING: [0x00,0x80,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f8000 + +str zt0, [x10] // 11100001-00111111-10000001-01000000 +// CHECK-INST: str zt0, [x10] +// CHECK-ENCODING: [0x40,0x81,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f8140 + +str zt0, [x13] // 11100001-00111111-10000001-10100000 +// CHECK-INST: str zt0, [x13] +// CHECK-ENCODING: [0xa0,0x81,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f81a0 + +str zt0, [sp] // 11100001-00111111-10000011-11100000 +// CHECK-INST: str zt0, [sp] +// CHECK-ENCODING: [0xe0,0x83,0x3f,0xe1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: e13f83e0 + diff --git a/llvm/test/MC/AArch64/SME2/sub-diagnostics.s b/llvm/test/MC/AArch64/SME2/sub-diagnostics.s --- a/llvm/test/MC/AArch64/SME2/sub-diagnostics.s +++ b/llvm/test/MC/AArch64/SME2/sub-diagnostics.s @@ -66,7 +66,7 @@ // Invalid vector list. sub za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: sub za.d[w8, 0], {z0.d,z2.d}, {z0.d,z2.d} // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sudot-diagnostics.s b/llvm/test/MC/AArch64/SME2/sudot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sudot-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid select register + +sudot za.s[w7, 0, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: sudot za.s[w7, 0, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sudot za.s[w12, 0, vgx2], {z0.b-z1.b}, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: sudot za.s[w12, 0, vgx2], {z0.b-z1.b}, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid select offset + +sudot za.s[w8, 8], {z0.b-z1.b}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: sudot za.s[w8, 8], {z0.b-z1.b}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sudot za.s[w8, -1], {z0.b-z3.b}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: sudot za.s[w8, -1], {z0.b-z3.b}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Out of range element index + +sudot za.s[w8, 0], {z0.b-z1.b}, z0.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: sudot za.s[w8, 0], {z0.b-z1.b}, z0.b[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sudot za.s[w8, 0], {z0.b-z3.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: sudot za.s[w8, 0], {z0.b-z3.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// ZPR range constraint + +sudot za.s[w8, 5], {z0.b-z1.b}, z16.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: sudot za.s[w8, 5], {z0.b-z1.b}, z16.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sudot za.s[w8, 5], {z0.b-z3.b}, z16.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: sudot za.s[w8, 5], {z0.b-z3.b}, z16.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// sudot (multi-single) + +sudot za.s[w8, 5], {z0.b-z1.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: sudot za.s[w8, 5], {z0.b-z1.b}, z16.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sudot za.s[w8, 5], {z0.b-z3.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: sudot za.s[w8, 5], {z0.b-z3.b}, z16.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sudot.s b/llvm/test/MC/AArch64/SME2/sudot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sudot.s @@ -0,0 +1,593 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sudot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00011000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x18,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201418 + +sudot za.s[w8, 0], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00011000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x18,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201418 + +sudot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01011101 +// CHECK-INST: sudot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x5d,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125555d + +sudot za.s[w10, 5], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01011101 +// CHECK-INST: sudot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x5d,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125555d + +sudot za.s[w11, 7, vgx2], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xbf,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875bf + +sudot za.s[w11, 7], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xbf,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875bf + +sudot za.s[w11, 7, vgx2], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xff,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77ff + +sudot za.s[w11, 7], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xff,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77ff + +sudot za.s[w8, 5, vgx2], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x3d,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120163d + +sudot za.s[w8, 5], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x3d,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120163d + +sudot za.s[w8, 1, vgx2], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x39,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1439 + +sudot za.s[w8, 1], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x39,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1439 + +sudot za.s[w10, 0, vgx2], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01111000 +// CHECK-INST: sudot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x78,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245678 + +sudot za.s[w10, 0], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01111000 +// CHECK-INST: sudot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x78,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245678 + +sudot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10011000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x98,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221598 + +sudot za.s[w8, 0], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10011000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x98,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221598 + +sudot za.s[w10, 1, vgx2], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x39,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5439 + +sudot za.s[w10, 1], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x39,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5439 + +sudot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11011101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xdd,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16dd + +sudot za.s[w8, 5], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11011101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xdd,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16dd + +sudot za.s[w11, 2, vgx2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x3a,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121753a + +sudot za.s[w11, 2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x3a,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121753a + +sudot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10011111 +// CHECK-INST: sudot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x9f,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b359f + +sudot za.s[w9, 7], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10011111 +// CHECK-INST: sudot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x9f,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b359f + + +sudot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00111000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x38,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501038 + +sudot za.s[w8, 0], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00111000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x38,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501038 + +sudot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01111101 +// CHECK-INST: sudot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x7d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155557d + +sudot za.s[w10, 5], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01111101 +// CHECK-INST: sudot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x7d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155557d + +sudot za.s[w11, 7, vgx2], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xbf,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587dbf + +sudot za.s[w11, 7], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xbf,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587dbf + +sudot za.s[w11, 7, vgx2], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xff,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fff + +sudot za.s[w11, 7], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11111111 +// CHECK-INST: sudot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xff,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fff + +sudot za.s[w8, 5, vgx2], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x3d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e3d + +sudot za.s[w8, 5], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x3d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e3d + +sudot za.s[w8, 1, vgx2], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x39,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1439 + +sudot za.s[w8, 1], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x39,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1439 + +sudot za.s[w10, 0, vgx2], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01111000 +// CHECK-INST: sudot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x78,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545678 + +sudot za.s[w10, 0], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01111000 +// CHECK-INST: sudot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x78,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545678 + +sudot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10111000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xb8,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219b8 + +sudot za.s[w8, 0], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10111000 +// CHECK-INST: sudot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xb8,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219b8 + +sudot za.s[w10, 1, vgx2], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x39,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5839 + +sudot za.s[w10, 1], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x39,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5839 + +sudot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11111101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xfd,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1afd + +sudot za.s[w8, 5], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11111101 +// CHECK-INST: sudot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xfd,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1afd + +sudot za.s[w11, 2, vgx2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x3a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151753a + +sudot za.s[w11, 2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x3a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151753a + +sudot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10111111 +// CHECK-INST: sudot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xbf,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39bf + +sudot za.s[w9, 7], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10111111 +// CHECK-INST: sudot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xbf,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39bf + + +sudot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00011000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x18,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301418 + +sudot za.s[w8, 0], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00011000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x18,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301418 + +sudot za.s[w10, 5, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01011101 +// CHECK-INST: sudot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x5d,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135555d + +sudot za.s[w10, 5], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01011101 +// CHECK-INST: sudot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x5d,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135555d + +sudot za.s[w11, 7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xbf,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875bf + +sudot za.s[w11, 7], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xbf,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875bf + +sudot za.s[w11, 7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xff,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77ff + +sudot za.s[w11, 7], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xff,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77ff + +sudot za.s[w8, 5, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x3d,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c130163d + +sudot za.s[w8, 5], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x3d,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c130163d + +sudot za.s[w8, 1, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x39,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1439 + +sudot za.s[w8, 1], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x39,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1439 + +sudot za.s[w10, 0, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01111000 +// CHECK-INST: sudot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x78,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345678 + +sudot za.s[w10, 0], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01111000 +// CHECK-INST: sudot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x78,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345678 + +sudot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10011000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x98,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321598 + +sudot za.s[w8, 0], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10011000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x98,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321598 + +sudot za.s[w10, 1, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x39,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5439 + +sudot za.s[w10, 1], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x39,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5439 + +sudot za.s[w8, 5, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11011101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xdd,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16dd + +sudot za.s[w8, 5], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11011101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xdd,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16dd + +sudot za.s[w11, 2, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x3a,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131753a + +sudot za.s[w11, 2], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x3a,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131753a + +sudot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10011111 +// CHECK-INST: sudot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x9f,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b359f + +sudot za.s[w9, 7], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10011111 +// CHECK-INST: sudot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x9f,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b359f + + +sudot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00111000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x38,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509038 + +sudot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00111000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x38,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509038 + +sudot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00111101 +// CHECK-INST: sudot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x3d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d53d + +sudot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00111101 +// CHECK-INST: sudot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x3d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d53d + +sudot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xbf,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fdbf + +sudot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xbf,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fdbf + +sudot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xbf,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffbf + +sudot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10111111 +// CHECK-INST: sudot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xbf,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffbf + +sudot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x3d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e3d + +sudot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00111101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x3d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e3d + +sudot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x39,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9439 + +sudot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00111001 +// CHECK-INST: sudot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x39,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9439 + +sudot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00111000 +// CHECK-INST: sudot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x38,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d638 + +sudot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00111000 +// CHECK-INST: sudot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x38,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d638 + +sudot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10111000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb8,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299b8 + +sudot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10111000 +// CHECK-INST: sudot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb8,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299b8 + +sudot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x39,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad839 + +sudot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00111001 +// CHECK-INST: sudot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x39,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad839 + +sudot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10111101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xbd,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9abd + +sudot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10111101 +// CHECK-INST: sudot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xbd,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9abd + +sudot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x3a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f53a + +sudot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00111010 +// CHECK-INST: sudot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x3a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f53a + +sudot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10111111 +// CHECK-INST: sudot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xbf,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9bf + +sudot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10111111 +// CHECK-INST: sudot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xbf,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9bf + diff --git a/llvm/test/MC/AArch64/SME2/sumlall-diagnostics.s b/llvm/test/MC/AArch64/SME2/sumlall-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sumlall-diagnostics.s @@ -0,0 +1,74 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sumlall za.s[w11, 4:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sumlall za.s[w11, 4:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sumlall za.s[w11, 4:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sumlall za.s[w11, 4:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +sumlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: sumlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sumlall za.s[w10, 4:7], z10.b, z30.b[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: sumlall za.s[w10, 4:7], z10.b, z30.b[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +sumlall za.s[w7, 6:7, vgx2], {z12.b-z13.b}, z8.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: sumlall za.s[w7, 6:7, vgx2], {z12.b-z13.b}, z8.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sumlall za.s[w12, 6:7, vgx2], {z12.b-z13.b}, z8.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: sumlall za.s[w12, 6:7, vgx2], {z12.b-z13.b}, z8.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +sumlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sumlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sumlall za.s[w8, 5:8, vgx2], {z22.b-z23.b}, z14.b[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: sumlall za.s[w8, 5:8, vgx2], {z22.b-z23.b}, z14.b[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sumlall za.h[w8, 6:7, vgx2], {z12.b-z13.b}, z8.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: sumlall za.h[w8, 6:7, vgx2], {z12.b-z13.b}, z8.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +sumlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: sumlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sumlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: sumlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sumlall.s b/llvm/test/MC/AArch64/SME2/sumlall.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sumlall.s @@ -0,0 +1,1029 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +usmlall za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x04,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200404 + +usmlall za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x45,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254545 + +usmlall za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xa7,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865a7 + +usmlall za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xe7,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67e7 + +usmlall za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x25,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200625 + +usmlall za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x25,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0425 + +usmlall za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x64,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244664 + +usmlall za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x84,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220584 + +usmlall za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x25,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4425 + +usmlall za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xc5,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06c5 + +usmlall za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00100110 +// CHECK-INST: usmlall za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x26,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216526 + +usmlall za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10000111 +// CHECK-INST: usmlall za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x87,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2587 + + +usmlall za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x04,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000004 + +usmlall za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x45,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055545 + +usmlall za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xa7,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108eda7 + +usmlall za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xe7,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10fffe7 + +usmlall za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x25,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e25 + +usmlall za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x25,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8425 + +usmlall za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x64,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045664 + +usmlall za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x84,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021984 + +usmlall za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x25,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac825 + +usmlall za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xc5,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ac5 + +usmlall za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00100110 +// CHECK-INST: usmlall za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x26,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f526 + +usmlall za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10000111 +// CHECK-INST: usmlall za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x87,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba987 + + +usmlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200004 + +usmlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200004 + +usmlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254145 + +usmlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254145 + +usmlall za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a5 + +usmlall za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a5 + +usmlall za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e5 + +usmlall za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e5 + +usmlall za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200225 + +usmlall za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200225 + +usmlall za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0025 + +usmlall za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0025 + +usmlall za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01100100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244264 + +usmlall za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01100100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244264 + +usmlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220184 + +usmlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220184 + +usmlall za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4025 + +usmlall za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4025 + +usmlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c5 + +usmlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c5 + +usmlall za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00100100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216124 + +usmlall za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00100100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216124 + +usmlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2185 + +usmlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2185 + + +usmlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100020 + +usmlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100020 + +usmlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x65,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154565 + +usmlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x65,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154565 + +usmlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186da7 + +usmlall za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186da7 + +usmlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xe7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fe7 + +usmlall za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xe7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fe7 + +usmlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e25 + +usmlall za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e25 + +usmlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00100001 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0421 + +usmlall za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00100001 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0421 + +usmlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01100000 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x60,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144660 + +usmlall za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01100000 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x60,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144660 + +usmlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11209a0 + +usmlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11209a0 + +usmlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00100001 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4821 + +usmlall za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00100001 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4821 + +usmlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xe5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ae5 + +usmlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xe5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ae5 + +usmlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00100010 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116522 + +usmlall za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00100010 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116522 + +usmlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10100111 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b29a7 + +usmlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10100111 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b29a7 + + +usmlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00004 + +usmlall za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00004 + +usmlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x45,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44145 + +usmlall za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x45,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44145 + +usmlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x85,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86185 + +usmlall za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x85,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86185 + +usmlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c5 + +usmlall za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c5 + +usmlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00205 + +usmlall za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00205 + +usmlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0005 + +usmlall za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0005 + +usmlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01000100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x44,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44244 + +usmlall za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01000100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x44,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44244 + +usmlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20184 + +usmlall za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20184 + +usmlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x05,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4005 + +usmlall za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x05,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4005 + +usmlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c5 + +usmlall za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c5 + +usmlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00000100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06104 + +usmlall za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00000100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06104 + +usmlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x85,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2185 + +usmlall za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x85,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2185 + + +usmlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300004 + +usmlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300004 + +usmlall za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354145 + +usmlall za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354145 + +usmlall za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a5 + +usmlall za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a5 + +usmlall za.s[w11, 4:7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e5 + +usmlall za.s[w11, 4:7], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e5 + +usmlall za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300225 + +usmlall za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300225 + +usmlall za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0025 + +usmlall za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0025 + +usmlall za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344264 + +usmlall za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344264 + +usmlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320184 + +usmlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320184 + +usmlall za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4025 + +usmlall za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4025 + +usmlall za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c5 + +usmlall za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c5 + +usmlall za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316124 + +usmlall za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316124 + +usmlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2185 + +usmlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2185 + + +usmlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108020 + +usmlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108020 + +usmlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x25,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c525 + +usmlall za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x25,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c525 + +usmlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118eda7 + +usmlall za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118eda7 + +usmlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0xa7,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fefa7 + +usmlall za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0xa7,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fefa7 + +usmlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e25 + +usmlall za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e25 + +usmlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00100001 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8421 + +usmlall za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00100001 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8421 + +usmlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00100000 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x20,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c620 + +usmlall za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00100000 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x20,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c620 + +usmlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11289a0 + +usmlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11289a0 + +usmlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00100001 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac821 + +usmlall za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00100001 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac821 + +usmlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0xa5,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8aa5 + +usmlall za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0xa5,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8aa5 + +usmlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00100010 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e522 + +usmlall za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00100010 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e522 + +usmlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10100111 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba9a7 + +usmlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10100111 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba9a7 + + +usmlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10004 + +usmlall za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10004 + +usmlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x05,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54105 + +usmlall za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x05,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54105 + +usmlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96185 + +usmlall za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96185 + +usmlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6385 + +usmlall za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6385 + +usmlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x05,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10205 + +usmlall za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x05,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10205 + +usmlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0005 + +usmlall za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0005 + +usmlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x04,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54204 + +usmlall za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x04,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54204 + +usmlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10184 + +usmlall za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10184 + +usmlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x05,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94005 + +usmlall za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x05,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94005 + +usmlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0285 + +usmlall za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0285 + +usmlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16104 + +usmlall za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16104 + +usmlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92185 + +usmlall za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92185 + diff --git a/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s b/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sunpk-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +sunpk {z0.h-z2.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.h-z2.h}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z1.s-z2.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: sunpk {z1.s-z2.s}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: sunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +sunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +sunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: sunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/sunpk.s b/llvm/test/MC/AArch64/SME2/sunpk.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/sunpk.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +sunpk {z0.h - z1.h}, z0.b // 11000001-01100101-11100000-00000000 +// CHECK-INST: sunpk { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x00,0xe0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e000 + +sunpk {z20.h - z21.h}, z10.b // 11000001-01100101-11100001-01010100 +// CHECK-INST: sunpk { z20.h, z21.h }, z10.b +// CHECK-ENCODING: [0x54,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e154 + +sunpk {z22.h - z23.h}, z13.b // 11000001-01100101-11100001-10110110 +// CHECK-INST: sunpk { z22.h, z23.h }, z13.b +// CHECK-ENCODING: [0xb6,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e1b6 + +sunpk {z30.h - z31.h}, z31.b // 11000001-01100101-11100011-11111110 +// CHECK-INST: sunpk { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xfe,0xe3,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e3fe + + +sunpk {z0.s - z1.s}, z0.h // 11000001-10100101-11100000-00000000 +// CHECK-INST: sunpk { z0.s, z1.s }, z0.h +// CHECK-ENCODING: [0x00,0xe0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e000 + +sunpk {z20.s - z21.s}, z10.h // 11000001-10100101-11100001-01010100 +// CHECK-INST: sunpk { z20.s, z21.s }, z10.h +// CHECK-ENCODING: [0x54,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e154 + +sunpk {z22.s - z23.s}, z13.h // 11000001-10100101-11100001-10110110 +// CHECK-INST: sunpk { z22.s, z23.s }, z13.h +// CHECK-ENCODING: [0xb6,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e1b6 + +sunpk {z30.s - z31.s}, z31.h // 11000001-10100101-11100011-11111110 +// CHECK-INST: sunpk { z30.s, z31.s }, z31.h +// CHECK-ENCODING: [0xfe,0xe3,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e3fe + + +sunpk {z0.d - z1.d}, z0.s // 11000001-11100101-11100000-00000000 +// CHECK-INST: sunpk { z0.d, z1.d }, z0.s +// CHECK-ENCODING: [0x00,0xe0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e000 + +sunpk {z20.d - z21.d}, z10.s // 11000001-11100101-11100001-01010100 +// CHECK-INST: sunpk { z20.d, z21.d }, z10.s +// CHECK-ENCODING: [0x54,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e154 + +sunpk {z22.d - z23.d}, z13.s // 11000001-11100101-11100001-10110110 +// CHECK-INST: sunpk { z22.d, z23.d }, z13.s +// CHECK-ENCODING: [0xb6,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e1b6 + +sunpk {z30.d - z31.d}, z31.s // 11000001-11100101-11100011-11111110 +// CHECK-INST: sunpk { z30.d, z31.d }, z31.s +// CHECK-ENCODING: [0xfe,0xe3,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e3fe + + +sunpk {z0.h - z3.h}, {z0.b - z1.b} // 11000001-01110101-11100000-00000000 +// CHECK-INST: sunpk { z0.h - z3.h }, { z0.b, z1.b } +// CHECK-ENCODING: [0x00,0xe0,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e000 + +sunpk {z20.h - z23.h}, {z10.b - z11.b} // 11000001-01110101-11100001-01010100 +// CHECK-INST: sunpk { z20.h - z23.h }, { z10.b, z11.b } +// CHECK-ENCODING: [0x54,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e154 + +sunpk {z20.h - z23.h}, {z12.b - z13.b} // 11000001-01110101-11100001-10010100 +// CHECK-INST: sunpk { z20.h - z23.h }, { z12.b, z13.b } +// CHECK-ENCODING: [0x94,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e194 + +sunpk {z28.h - z31.h}, {z30.b - z31.b} // 11000001-01110101-11100011-11011100 +// CHECK-INST: sunpk { z28.h - z31.h }, { z30.b, z31.b } +// CHECK-ENCODING: [0xdc,0xe3,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e3dc + + +sunpk {z0.s - z3.s}, {z0.h - z1.h} // 11000001-10110101-11100000-00000000 +// CHECK-INST: sunpk { z0.s - z3.s }, { z0.h, z1.h } +// CHECK-ENCODING: [0x00,0xe0,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e000 + +sunpk {z20.s - z23.s}, {z10.h - z11.h} // 11000001-10110101-11100001-01010100 +// CHECK-INST: sunpk { z20.s - z23.s }, { z10.h, z11.h } +// CHECK-ENCODING: [0x54,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e154 + +sunpk {z20.s - z23.s}, {z12.h - z13.h} // 11000001-10110101-11100001-10010100 +// CHECK-INST: sunpk { z20.s - z23.s }, { z12.h, z13.h } +// CHECK-ENCODING: [0x94,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e194 + +sunpk {z28.s - z31.s}, {z30.h - z31.h} // 11000001-10110101-11100011-11011100 +// CHECK-INST: sunpk { z28.s - z31.s }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdc,0xe3,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e3dc + + +sunpk {z0.d - z3.d}, {z0.s - z1.s} // 11000001-11110101-11100000-00000000 +// CHECK-INST: sunpk { z0.d - z3.d }, { z0.s, z1.s } +// CHECK-ENCODING: [0x00,0xe0,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e000 + +sunpk {z20.d - z23.d}, {z10.s - z11.s} // 11000001-11110101-11100001-01010100 +// CHECK-INST: sunpk { z20.d - z23.d }, { z10.s, z11.s } +// CHECK-ENCODING: [0x54,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e154 + +sunpk {z20.d - z23.d}, {z12.s - z13.s} // 11000001-11110101-11100001-10010100 +// CHECK-INST: sunpk { z20.d - z23.d }, { z12.s, z13.s } +// CHECK-ENCODING: [0x94,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e194 + +sunpk {z28.d - z31.d}, {z30.s - z31.s} // 11000001-11110101-11100011-11011100 +// CHECK-INST: sunpk { z28.d - z31.d }, { z30.s, z31.s } +// CHECK-ENCODING: [0xdc,0xe3,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e3dc + diff --git a/llvm/test/MC/AArch64/SME2/suvdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/suvdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/suvdot-diagnostics.s @@ -0,0 +1,69 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Out of range index offset + +suvdot za.s[w8, 8, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: suvdot za.s[w8, 8, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +suvdot za.s[w8, -1, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: suvdot za.s[w8, -1, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +suvdot za.s[w7, 0, vgx4], {z4.b-z7.b}, z0.b[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: suvdot za.s[w7, 0, vgx4], {z4.b-z7.b}, z0.b[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +suvdot za.s[w12, 0, vgx4], {z8.b-z11.b}, z5.b[5] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: suvdot za.s[w12, 0, vgx4], {z8.b-z11.b}, z5.b[5] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector list + +suvdot za.s[w8, 0, vgx4], {z0.b-z4.b}, z0.b[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: suvdot za.s[w8, 0, vgx4], {z0.b-z4.b}, z0.b[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +suvdot za.s[w8, 0, vgx4], {z1.b-z4.b}, z15.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element type +// CHECK-NEXT: suvdot za.s[w8, 0, vgx4], {z1.b-z4.b}, z15.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Matrix Operand + +suvdot za.h[w8, 0, vgx4], {z0.b-z3.b}, z4.b[7] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: suvdot za.h[w8, 0, vgx4], {z0.b-z3.b}, z4.b[7] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector grouping + +suvdot za.s[w8, 0, vgx2], {z0.b-z3.b}, z14.b[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: suvdot za.s[w8, 0, vgx2], {z0.b-z3.b}, z14.b[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid lane index + +suvdot za.s[w8, 0, vgx4], {z0.b-z3.b}, z14.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: suvdot za.s[w8, 0, vgx4], {z0.b-z3.b}, z14.b[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +suvdot za.s[w8, 0, vgx4], {z0.b-z3.b}, z14.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: suvdot za.s[w8, 0, vgx4], {z0.b-z3.b}, z14.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/suvdot.s b/llvm/test/MC/AArch64/SME2/suvdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/suvdot.s @@ -0,0 +1,158 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +suvdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10000000-00111000 +// CHECK-INST: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x38,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508038 + +suvdot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10000000-00111000 +// CHECK-INST: suvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x38,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508038 + +suvdot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11000101-00111101 +// CHECK-INST: suvdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x3d,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c53d + +suvdot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11000101-00111101 +// CHECK-INST: suvdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x3d,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c53d + +suvdot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11101101-10111111 +// CHECK-INST: suvdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xbf,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158edbf + +suvdot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11101101-10111111 +// CHECK-INST: suvdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xbf,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158edbf + +suvdot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11101111-10111111 +// CHECK-INST: suvdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xbf,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fefbf + +suvdot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11101111-10111111 +// CHECK-INST: suvdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xbf,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fefbf + +suvdot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10001110-00111101 +// CHECK-INST: suvdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x3d,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e3d + +suvdot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10001110-00111101 +// CHECK-INST: suvdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x3d,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e3d + +suvdot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10000100-00111001 +// CHECK-INST: suvdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x39,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8439 + +suvdot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10000100-00111001 +// CHECK-INST: suvdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x39,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8439 + +suvdot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11000110-00111000 +// CHECK-INST: suvdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x38,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c638 + +suvdot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11000110-00111000 +// CHECK-INST: suvdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x38,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c638 + +suvdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10001001-10111000 +// CHECK-INST: suvdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb8,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15289b8 + +suvdot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10001001-10111000 +// CHECK-INST: suvdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb8,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15289b8 + +suvdot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11001000-00111001 +// CHECK-INST: suvdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x39,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac839 + +suvdot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11001000-00111001 +// CHECK-INST: suvdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x39,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac839 + +suvdot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10001010-10111101 +// CHECK-INST: suvdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xbd,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8abd + +suvdot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10001010-10111101 +// CHECK-INST: suvdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xbd,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8abd + +suvdot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11100101-00111010 +// CHECK-INST: suvdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x3a,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e53a + +suvdot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11100101-00111010 +// CHECK-INST: suvdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x3a,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e53a + +suvdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10101001-10111111 +// CHECK-INST: suvdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xbf,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba9bf + +suvdot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10101001-10111111 +// CHECK-INST: suvdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xbf,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba9bf + diff --git a/llvm/test/MC/AArch64/SME2/svdot.s b/llvm/test/MC/AArch64/SME2/svdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/svdot.s @@ -0,0 +1,448 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +svdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00100000 +// CHECK-INST: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x20,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500020 + +svdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00100000 +// CHECK-INST: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x20,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500020 + +svdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01100101 +// CHECK-INST: svdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x65,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554565 + +svdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01100101 +// CHECK-INST: svdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x65,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554565 + +svdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10100111 +// CHECK-INST: svdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0xa7,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586da7 + +svdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10100111 +// CHECK-INST: svdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0xa7,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586da7 + +svdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11100111 +// CHECK-INST: svdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xe7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fe7 + +svdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11100111 +// CHECK-INST: svdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xe7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6fe7 + +svdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00100101 +// CHECK-INST: svdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x25,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e25 + +svdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00100101 +// CHECK-INST: svdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x25,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e25 + +svdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00100001 +// CHECK-INST: svdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x21,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0421 + +svdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00100001 +// CHECK-INST: svdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x21,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0421 + +svdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01100000 +// CHECK-INST: svdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x60,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544660 + +svdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01100000 +// CHECK-INST: svdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x60,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544660 + +svdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10100000 +// CHECK-INST: svdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0xa0,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15209a0 + +svdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10100000 +// CHECK-INST: svdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0xa0,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15209a0 + +svdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00100001 +// CHECK-INST: svdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x21,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4821 + +svdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00100001 +// CHECK-INST: svdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x21,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4821 + +svdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11100101 +// CHECK-INST: svdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xe5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0ae5 + +svdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11100101 +// CHECK-INST: svdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xe5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0ae5 + +svdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00100010 +// CHECK-INST: svdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x22,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516522 + +svdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00100010 +// CHECK-INST: svdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x22,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516522 + +svdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10100111 +// CHECK-INST: svdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0xa7,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b29a7 + +svdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10100111 +// CHECK-INST: svdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0xa7,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b29a7 + + +svdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10000000-00100000 +// CHECK-INST: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508020 + +svdot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10000000-00100000 +// CHECK-INST: svdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508020 + +svdot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11000101-00100101 +// CHECK-INST: svdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x25,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c525 + +svdot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11000101-00100101 +// CHECK-INST: svdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x25,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c525 + +svdot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11101101-10100111 +// CHECK-INST: svdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xa7,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158eda7 + +svdot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11101101-10100111 +// CHECK-INST: svdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xa7,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158eda7 + +svdot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11101111-10100111 +// CHECK-INST: svdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xa7,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fefa7 + +svdot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11101111-10100111 +// CHECK-INST: svdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xa7,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fefa7 + +svdot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10001110-00100101 +// CHECK-INST: svdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x25,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e25 + +svdot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10001110-00100101 +// CHECK-INST: svdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x25,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e25 + +svdot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10000100-00100001 +// CHECK-INST: svdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x21,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8421 + +svdot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10000100-00100001 +// CHECK-INST: svdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x21,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8421 + +svdot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11000110-00100000 +// CHECK-INST: svdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x20,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c620 + +svdot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11000110-00100000 +// CHECK-INST: svdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x20,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c620 + +svdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10001001-10100000 +// CHECK-INST: svdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xa0,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15289a0 + +svdot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10001001-10100000 +// CHECK-INST: svdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xa0,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15289a0 + +svdot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11001000-00100001 +// CHECK-INST: svdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x21,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac821 + +svdot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11001000-00100001 +// CHECK-INST: svdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x21,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac821 + +svdot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10001010-10100101 +// CHECK-INST: svdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xa5,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8aa5 + +svdot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10001010-10100101 +// CHECK-INST: svdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xa5,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8aa5 + +svdot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11100101-00100010 +// CHECK-INST: svdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x22,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e522 + +svdot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11100101-00100010 +// CHECK-INST: svdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x22,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e522 + +svdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10101001-10100111 +// CHECK-INST: svdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xa7,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba9a7 + +svdot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10101001-10100111 +// CHECK-INST: svdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xa7,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba9a7 + + +svdot za.d[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10001000-00001000 +// CHECK-INST: svdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x88,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08808 + +svdot za.d[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10001000-00001000 +// CHECK-INST: svdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x08,0x88,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08808 + +svdot za.d[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11001101-00001101 +// CHECK-INST: svdot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x0d,0xcd,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5cd0d + +svdot za.d[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11001101-00001101 +// CHECK-INST: svdot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x0d,0xcd,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5cd0d + +svdot za.d[w11, 7, vgx4], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11101101-10001111 +// CHECK-INST: svdot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x8f,0xed,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8ed8f + +svdot za.d[w11, 7], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11101101-10001111 +// CHECK-INST: svdot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x8f,0xed,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8ed8f + +svdot za.d[w11, 7, vgx4], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11101111-10001111 +// CHECK-INST: svdot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x8f,0xef,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfef8f + +svdot za.d[w11, 7], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11101111-10001111 +// CHECK-INST: svdot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x8f,0xef,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfef8f + +svdot za.d[w8, 5, vgx4], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10001110-00001101 +// CHECK-INST: svdot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x0d,0x8e,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08e0d + +svdot za.d[w8, 5], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10001110-00001101 +// CHECK-INST: svdot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x0d,0x8e,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08e0d + +svdot za.d[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10001100-00001001 +// CHECK-INST: svdot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x8c,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8c09 + +svdot za.d[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10001100-00001001 +// CHECK-INST: svdot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x09,0x8c,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8c09 + +svdot za.d[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11001110-00001000 +// CHECK-INST: svdot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x08,0xce,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4ce08 + +svdot za.d[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11001110-00001000 +// CHECK-INST: svdot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x08,0xce,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4ce08 + +svdot za.d[w8, 0, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10001001-10001000 +// CHECK-INST: svdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x89,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28988 + +svdot za.d[w8, 0], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10001001-10001000 +// CHECK-INST: svdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x88,0x89,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28988 + +svdot za.d[w10, 1, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11001000-00001001 +// CHECK-INST: svdot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc8,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac809 + +svdot za.d[w10, 1], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11001000-00001001 +// CHECK-INST: svdot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x09,0xc8,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac809 + +svdot za.d[w8, 5, vgx4], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10001010-10001101 +// CHECK-INST: svdot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x8d,0x8a,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8a8d + +svdot za.d[w8, 5], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10001010-10001101 +// CHECK-INST: svdot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x8d,0x8a,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8a8d + +svdot za.d[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11101101-00001010 +// CHECK-INST: svdot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0xed,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1ed0a + +svdot za.d[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11101101-00001010 +// CHECK-INST: svdot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x0a,0xed,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1ed0a + +svdot za.d[w9, 7, vgx4], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10101001-10001111 +// CHECK-INST: svdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x8f,0xa9,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba98f + +svdot za.d[w9, 7], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10101001-10001111 +// CHECK-INST: svdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x8f,0xa9,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba98f + diff --git a/llvm/test/MC/AArch64/SME2/uclamp-diagnostics.s b/llvm/test/MC/AArch64/SME2/uclamp-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uclamp-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uclamp {z0.b-z2.b}, z0.b, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uclamp {z0.b-z2.b}, z0.b, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uclamp {z1.s-z2.s}, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: uclamp {z1.s-z2.s}, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +uclamp {z0.h-z1.h}, z0.h, z4.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: uclamp {z0.h-z1.h}, z0.h, z4.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uclamp.s b/llvm/test/MC/AArch64/SME2/uclamp.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uclamp.s @@ -0,0 +1,213 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uclamp {z0.h, z1.h}, z0.h, z0.h // 11000001-01100000-11000100-00000001 +// CHECK-INST: uclamp { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x01,0xc4,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160c401 + +uclamp {z20.h, z21.h}, z10.h, z21.h // 11000001-01110101-11000101-01010101 +// CHECK-INST: uclamp { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x55,0xc5,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175c555 + +uclamp {z22.h, z23.h}, z13.h, z8.h // 11000001-01101000-11000101-10110111 +// CHECK-INST: uclamp { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0xc5,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168c5b7 + +uclamp {z30.h, z31.h}, z31.h, z31.h // 11000001-01111111-11000111-11111111 +// CHECK-INST: uclamp { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xff,0xc7,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fc7ff + + +uclamp {z0.s, z1.s}, z0.s, z0.s // 11000001-10100000-11000100-00000001 +// CHECK-INST: uclamp { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x01,0xc4,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0c401 + +uclamp {z20.s, z21.s}, z10.s, z21.s // 11000001-10110101-11000101-01010101 +// CHECK-INST: uclamp { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x55,0xc5,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5c555 + +uclamp {z22.s, z23.s}, z13.s, z8.s // 11000001-10101000-11000101-10110111 +// CHECK-INST: uclamp { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb7,0xc5,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8c5b7 + +uclamp {z30.s, z31.s}, z31.s, z31.s // 11000001-10111111-11000111-11111111 +// CHECK-INST: uclamp { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xff,0xc7,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfc7ff + + +uclamp {z0.d, z1.d}, z0.d, z0.d // 11000001-11100000-11000100-00000001 +// CHECK-INST: uclamp { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x01,0xc4,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0c401 + +uclamp {z20.d, z21.d}, z10.d, z21.d // 11000001-11110101-11000101-01010101 +// CHECK-INST: uclamp { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x55,0xc5,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5c555 + +uclamp {z22.d, z23.d}, z13.d, z8.d // 11000001-11101000-11000101-10110111 +// CHECK-INST: uclamp { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb7,0xc5,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8c5b7 + +uclamp {z30.d, z31.d}, z31.d, z31.d // 11000001-11111111-11000111-11111111 +// CHECK-INST: uclamp { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xff,0xc7,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffc7ff + + +uclamp {z0.b, z1.b}, z0.b, z0.b // 11000001-00100000-11000100-00000001 +// CHECK-INST: uclamp { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x01,0xc4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120c401 + +uclamp {z20.b, z21.b}, z10.b, z21.b // 11000001-00110101-11000101-01010101 +// CHECK-INST: uclamp { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xc5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135c555 + +uclamp {z22.b, z23.b}, z13.b, z8.b // 11000001-00101000-11000101-10110111 +// CHECK-INST: uclamp { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0xc5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128c5b7 + +uclamp {z30.b, z31.b}, z31.b, z31.b // 11000001-00111111-11000111-11111111 +// CHECK-INST: uclamp { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xff,0xc7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fc7ff + + +uclamp {z0.h - z3.h}, z0.h, z0.h // 11000001-01100000-11001100-00000001 +// CHECK-INST: uclamp { z0.h - z3.h }, z0.h, z0.h +// CHECK-ENCODING: [0x01,0xcc,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160cc01 + +uclamp {z20.h - z23.h}, z10.h, z21.h // 11000001-01110101-11001101-01010101 +// CHECK-INST: uclamp { z20.h - z23.h }, z10.h, z21.h +// CHECK-ENCODING: [0x55,0xcd,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175cd55 + +uclamp {z20.h - z23.h}, z13.h, z8.h // 11000001-01101000-11001101-10110101 +// CHECK-INST: uclamp { z20.h - z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb5,0xcd,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168cdb5 + +uclamp {z28.h - z31.h}, z31.h, z31.h // 11000001-01111111-11001111-11111101 +// CHECK-INST: uclamp { z28.h - z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfd,0xcf,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fcffd + + +uclamp {z0.s - z3.s}, z0.s, z0.s // 11000001-10100000-11001100-00000001 +// CHECK-INST: uclamp { z0.s - z3.s }, z0.s, z0.s +// CHECK-ENCODING: [0x01,0xcc,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0cc01 + +uclamp {z20.s - z23.s}, z10.s, z21.s // 11000001-10110101-11001101-01010101 +// CHECK-INST: uclamp { z20.s - z23.s }, z10.s, z21.s +// CHECK-ENCODING: [0x55,0xcd,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5cd55 + +uclamp {z20.s - z23.s}, z13.s, z8.s // 11000001-10101000-11001101-10110101 +// CHECK-INST: uclamp { z20.s - z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb5,0xcd,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8cdb5 + +uclamp {z28.s - z31.s}, z31.s, z31.s // 11000001-10111111-11001111-11111101 +// CHECK-INST: uclamp { z28.s - z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfd,0xcf,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfcffd + + +uclamp {z0.d - z3.d}, z0.d, z0.d // 11000001-11100000-11001100-00000001 +// CHECK-INST: uclamp { z0.d - z3.d }, z0.d, z0.d +// CHECK-ENCODING: [0x01,0xcc,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0cc01 + +uclamp {z20.d - z23.d}, z10.d, z21.d // 11000001-11110101-11001101-01010101 +// CHECK-INST: uclamp { z20.d - z23.d }, z10.d, z21.d +// CHECK-ENCODING: [0x55,0xcd,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5cd55 + +uclamp {z20.d - z23.d}, z13.d, z8.d // 11000001-11101000-11001101-10110101 +// CHECK-INST: uclamp { z20.d - z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb5,0xcd,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8cdb5 + +uclamp {z28.d - z31.d}, z31.d, z31.d // 11000001-11111111-11001111-11111101 +// CHECK-INST: uclamp { z28.d - z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfd,0xcf,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffcffd + + +uclamp {z0.b - z3.b}, z0.b, z0.b // 11000001-00100000-11001100-00000001 +// CHECK-INST: uclamp { z0.b - z3.b }, z0.b, z0.b +// CHECK-ENCODING: [0x01,0xcc,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120cc01 + +uclamp {z20.b - z23.b}, z10.b, z21.b // 11000001-00110101-11001101-01010101 +// CHECK-INST: uclamp { z20.b - z23.b }, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xcd,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135cd55 + +uclamp {z20.b - z23.b}, z13.b, z8.b // 11000001-00101000-11001101-10110101 +// CHECK-INST: uclamp { z20.b - z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb5,0xcd,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128cdb5 + +uclamp {z28.b - z31.b}, z31.b, z31.b // 11000001-00111111-11001111-11111101 +// CHECK-INST: uclamp { z28.b - z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xfd,0xcf,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fcffd + diff --git a/llvm/test/MC/AArch64/SME2/ucvtf-diagnostics.s b/llvm/test/MC/AArch64/SME2/ucvtf-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ucvtf-diagnostics.s @@ -0,0 +1,27 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ucvtf {z0.s-z3.s}, {z0.s-z4.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: ucvtf {z0.s-z3.s}, {z0.s-z4.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ucvtf {z1.s-z2.s}, {z0.s-z1.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ucvtf {z1.s-z2.s}, {z0.s-z1.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ucvtf {z0.s-z3.s}, {z1.s-z5.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: ucvtf {z0.s-z3.s}, {z1.s-z5.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +ucvtf {z0.s-z3.s}, {z1.h-z3.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ucvtf {z0.s-z3.s}, {z1.h-z3.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/ucvtf.s b/llvm/test/MC/AArch64/SME2/ucvtf.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/ucvtf.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +ucvtf {z0.s, z1.s}, {z0.s, z1.s} // 11000001-00100010-11100000-00100000 +// CHECK-INST: ucvtf { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x20,0xe0,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e020 + +ucvtf {z20.s, z21.s}, {z10.s, z11.s} // 11000001-00100010-11100001-01110100 +// CHECK-INST: ucvtf { z20.s, z21.s }, { z10.s, z11.s } +// CHECK-ENCODING: [0x74,0xe1,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e174 + +ucvtf {z22.s, z23.s}, {z12.s, z13.s} // 11000001-00100010-11100001-10110110 +// CHECK-INST: ucvtf { z22.s, z23.s }, { z12.s, z13.s } +// CHECK-ENCODING: [0xb6,0xe1,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e1b6 + +ucvtf {z30.s, z31.s}, {z30.s, z31.s} // 11000001-00100010-11100011-11111110 +// CHECK-INST: ucvtf { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0xfe,0xe3,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c122e3fe + + +ucvtf {z0.s - z3.s}, {z0.s - z3.s} // 11000001-00110010-11100000-00100000 +// CHECK-INST: ucvtf { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x20,0xe0,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e020 + +ucvtf {z20.s - z23.s}, {z8.s - z11.s} // 11000001-00110010-11100001-00110100 +// CHECK-INST: ucvtf { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x34,0xe1,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e134 + +ucvtf {z20.s - z23.s}, {z12.s - z15.s} // 11000001-00110010-11100001-10110100 +// CHECK-INST: ucvtf { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0xb4,0xe1,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e1b4 + +ucvtf {z28.s - z31.s}, {z28.s - z31.s} // 11000001-00110010-11100011-10111100 +// CHECK-INST: ucvtf { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0xbc,0xe3,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c132e3bc + diff --git a/llvm/test/MC/AArch64/SME2/udot-diagnostics.s b/llvm/test/MC/AArch64/SME2/udot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/udot-diagnostics.s @@ -0,0 +1,65 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid select register + +udot za.s[w7, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: udot za.s[w7, 0, vgx4], {z0.h-z3.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +udot za.s[w12, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: udot za.s[w12, 0, vgx2], {z0.h-z1.h}, z0.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid select offset + +udot za.s[w8, 8], {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: udot za.s[w8, 8], {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +udot za.s[w8, -1], {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: udot za.s[w8, -1], {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Out of range element index + +udot za.s[w8, 0], {z0.h-z1.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: udot za.s[w8, 0], {z0.h-z1.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +udot za.s[w8, 0], {z0.h-z3.h}, z0.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: udot za.s[w8, 0], {z0.h-z3.h}, z0.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// ZPR range constraint + +udot za.s[w8, 5], {z0.h-z1.h}, z16.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: udot za.s[w8, 5], {z0.h-z1.h}, z16.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +udot za.s[w8, 5], {z0.h-z3.h}, z16.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: udot za.s[w8, 5], {z0.h-z3.h}, z16.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// udot (multi-single) + +udot za.s[w8, 5], {z0.h-z1.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: udot za.s[w8, 5], {z0.h-z1.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +udot za.s[w8, 5], {z0.h-z3.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: udot za.s[w8, 5], {z0.h-z3.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/udot.s b/llvm/test/MC/AArch64/SME2/udot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/udot.s @@ -0,0 +1,2626 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +udot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x18,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601418 + +udot za.s[w8, 0], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x18,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601418 + +udot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01011101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x5d,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165555d + +udot za.s[w10, 5], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01011101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x5d,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165555d + +udot za.s[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10111111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xbf,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875bf + +udot za.s[w11, 7], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10111111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xbf,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875bf + +udot za.s[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11111111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xff,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77ff + +udot za.s[w11, 7], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11111111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xff,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77ff + +udot za.s[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00111101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x3d,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160163d + +udot za.s[w8, 5], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00111101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x3d,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160163d + +udot za.s[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00111001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x39,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1439 + +udot za.s[w8, 1], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00111001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x39,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1439 + +udot za.s[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01111000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x78,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645678 + +udot za.s[w10, 0], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01111000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x78,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645678 + +udot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x98,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621598 + +udot za.s[w8, 0], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x98,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621598 + +udot za.s[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00111001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x39,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5439 + +udot za.s[w10, 1], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00111001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x39,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5439 + +udot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11011101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xdd,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16dd + +udot za.s[w8, 5], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11011101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xdd,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16dd + +udot za.s[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00111010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x3a,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161753a + +udot za.s[w11, 2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00111010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x3a,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161753a + +udot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x9f,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b359f + +udot za.s[w9, 7], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x9f,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b359f + + +udot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501010 + +udot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00010000-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501010 + +udot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x55,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555555 + +udot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x55,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555555 + +udot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x97,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d97 + +udot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01111101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0x97,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587d97 + +udot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xd7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fd7 + +udot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01111111-11010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xd7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fd7 + +udot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x15,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e15 + +udot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00011110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x15,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e15 + +udot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x11,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1411 + +udot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x11,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1411 + +udot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01010000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x50,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545650 + +udot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01010110-01010000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x50,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545650 + +udot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x90,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521990 + +udot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00011001-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0x90,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1521990 + +udot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x11,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5811 + +udot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01011000-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x11,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5811 + +udot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xd5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1ad5 + +udot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00011010-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xd5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1ad5 + +udot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x12,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517512 + +udot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x12,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517512 + +udot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x97,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b3997 + +udot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00111001-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0x97,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b3997 + + +udot za.s[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01418 + +udot za.s[w8, 0], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01418 + +udot za.s[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01011101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x5d,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4555d + +udot za.s[w10, 5], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01011101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x5d,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4555d + +udot za.s[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10011111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x9f,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8759f + +udot za.s[w11, 7], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10011111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x9f,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8759f + +udot za.s[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11011111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdf,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77df + +udot za.s[w11, 7], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11011111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdf,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77df + +udot za.s[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00011101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x1d,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f0161d + +udot za.s[w8, 5], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00011101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x1d,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f0161d + +udot za.s[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00011001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x19,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1419 + +udot za.s[w8, 1], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00011001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x19,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1419 + +udot za.s[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01011000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x58,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45658 + +udot za.s[w10, 0], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01011000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x58,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45658 + +udot za.s[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x98,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21598 + +udot za.s[w8, 0], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x98,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21598 + +udot za.s[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00011001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x19,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5419 + +udot za.s[w10, 1], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00011001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x19,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5419 + +udot za.s[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11011101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdd,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16dd + +udot za.s[w8, 5], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11011101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdd,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16dd + +udot za.s[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00011010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x1a,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0751a + +udot za.s[w11, 2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00011010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x1a,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0751a + +udot za.s[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x9f,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea359f + +udot za.s[w9, 7], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x9f,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea359f + + +udot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00110000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x30,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501030 + +udot za.s[w8, 0], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00110000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x30,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501030 + +udot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01110101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x75,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555575 + +udot za.s[w10, 5], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01110101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x75,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1555575 + +udot za.s[w11, 7, vgx2], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xb7,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587db7 + +udot za.s[w11, 7], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xb7,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587db7 + +udot za.s[w11, 7, vgx2], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xf7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7ff7 + +udot za.s[w11, 7], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xf7,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7ff7 + +udot za.s[w8, 5, vgx2], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x35,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e35 + +udot za.s[w8, 5], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x35,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e35 + +udot za.s[w8, 1, vgx2], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x31,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1431 + +udot za.s[w8, 1], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x31,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1431 + +udot za.s[w10, 0, vgx2], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01110000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x70,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545670 + +udot za.s[w10, 0], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01110000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x70,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545670 + +udot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10110000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xb0,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219b0 + +udot za.s[w8, 0], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10110000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xb0,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219b0 + +udot za.s[w10, 1, vgx2], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x31,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5831 + +udot za.s[w10, 1], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x31,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5831 + +udot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11110101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xf5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1af5 + +udot za.s[w8, 5], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11110101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xf5,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1af5 + +udot za.s[w11, 2, vgx2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x32,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517532 + +udot za.s[w11, 2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x32,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1517532 + +udot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10110111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xb7,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39b7 + +udot za.s[w9, 7], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10110111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xb7,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39b7 + + +udot za.d[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-11010000-00000000-00011000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00018 + +udot za.d[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-11010000-00000000-00011000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d00018 + +udot za.d[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-11010101-01000101-01011101 +// CHECK-INST: udot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x5d,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5455d + +udot za.d[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-11010101-01000101-01011101 +// CHECK-INST: udot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x5d,0x45,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5455d + +udot za.d[w11, 7, vgx2], {z12.h, z13.h}, z8.h[1] // 11000001-11011000-01100101-10011111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z12.h, z13.h }, z8.h[1] +// CHECK-ENCODING: [0x9f,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8659f + +udot za.d[w11, 7], {z12.h, z13.h}, z8.h[1] // 11000001-11011000-01100101-10011111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z12.h, z13.h }, z8.h[1] +// CHECK-ENCODING: [0x9f,0x65,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8659f + +udot za.d[w11, 7, vgx2], {z30.h, z31.h}, z15.h[1] // 11000001-11011111-01100111-11011111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z30.h, z31.h }, z15.h[1] +// CHECK-ENCODING: [0xdf,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67df + +udot za.d[w11, 7], {z30.h, z31.h}, z15.h[1] // 11000001-11011111-01100111-11011111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z30.h, z31.h }, z15.h[1] +// CHECK-ENCODING: [0xdf,0x67,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1df67df + +udot za.d[w8, 5, vgx2], {z16.h, z17.h}, z0.h[1] // 11000001-11010000-00000110-00011101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z16.h, z17.h }, z0.h[1] +// CHECK-ENCODING: [0x1d,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0061d + +udot za.d[w8, 5], {z16.h, z17.h}, z0.h[1] // 11000001-11010000-00000110-00011101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z16.h, z17.h }, z0.h[1] +// CHECK-ENCODING: [0x1d,0x06,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0061d + +udot za.d[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-11011110-00000100-00011001 +// CHECK-INST: udot za.d[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0419 + +udot za.d[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-11011110-00000100-00011001 +// CHECK-INST: udot za.d[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x04,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de0419 + +udot za.d[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-11010100-01000110-01011000 +// CHECK-INST: udot za.d[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x58,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44658 + +udot za.d[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-11010100-01000110-01011000 +// CHECK-INST: udot za.d[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x58,0x46,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d44658 + +udot za.d[w8, 0, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001-11010010-00000001-10011000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20198 + +udot za.d[w8, 0], {z12.h, z13.h}, z2.h[0] // 11000001-11010010-00000001-10011000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x01,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d20198 + +udot za.d[w10, 1, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001-11011010-01000000-00011001 +// CHECK-INST: udot za.d[w10, 1, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4019 + +udot za.d[w10, 1], {z0.h, z1.h}, z10.h[0] // 11000001-11011010-01000000-00011001 +// CHECK-INST: udot za.d[w10, 1, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0x40,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1da4019 + +udot za.d[w8, 5, vgx2], {z22.h, z23.h}, z14.h[0] // 11000001-11011110-00000010-11011101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h[0] +// CHECK-ENCODING: [0xdd,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02dd + +udot za.d[w8, 5], {z22.h, z23.h}, z14.h[0] // 11000001-11011110-00000010-11011101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h[0] +// CHECK-ENCODING: [0xdd,0x02,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de02dd + +udot za.d[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-11010001-01100101-00011010 +// CHECK-INST: udot za.d[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1651a + +udot za.d[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-11010001-01100101-00011010 +// CHECK-INST: udot za.d[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0x65,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1651a + +udot za.d[w9, 7, vgx2], {z12.h, z13.h}, z11.h[0] // 11000001-11011011-00100001-10011111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h[0] +// CHECK-ENCODING: [0x9f,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db219f + +udot za.d[w9, 7], {z12.h, z13.h}, z11.h[0] // 11000001-11011011-00100001-10011111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h[0] +// CHECK-ENCODING: [0x9f,0x21,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1db219f + + +udot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x18,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701418 + +udot za.s[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x18,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701418 + +udot za.s[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01011101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x5d,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175555d + +udot za.s[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01011101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x5d,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175555d + +udot za.s[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10111111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xbf,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875bf + +udot za.s[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10111111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xbf,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875bf + +udot za.s[w11, 7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11111111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xff,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77ff + +udot za.s[w11, 7], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11111111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xff,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77ff + +udot za.s[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00111101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x3d,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c170163d + +udot za.s[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00111101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x3d,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c170163d + +udot za.s[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00111001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x39,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1439 + +udot za.s[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00111001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x39,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1439 + +udot za.s[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01111000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x78,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745678 + +udot za.s[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01111000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x78,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745678 + +udot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x98,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721598 + +udot za.s[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x98,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721598 + +udot za.s[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00111001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x39,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5439 + +udot za.s[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00111001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x39,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5439 + +udot za.s[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11011101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xdd,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16dd + +udot za.s[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11011101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xdd,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16dd + +udot za.s[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00111010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x3a,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c171753a + +udot za.s[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00111010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x3a,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c171753a + +udot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x9f,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b359f + +udot za.s[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x9f,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b359f + + +udot za.s[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509010 + +udot za.s[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-01010000-10010000-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509010 + +udot za.s[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00010101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x15,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d515 + +udot za.s[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-01010101-11010101-00010101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x15,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d515 + +udot za.s[w11, 7, vgx4], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x97,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd97 + +udot za.s[w11, 7], {z12.h - z15.h}, z8.h[3] // 11000001-01011000-11111101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.h - z15.h }, z8.h[3] +// CHECK-ENCODING: [0x97,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fd97 + +udot za.s[w11, 7, vgx4], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x97,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff97 + +udot za.s[w11, 7], {z28.h - z31.h}, z15.h[3] // 11000001-01011111-11111111-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.h - z31.h }, z15.h[3] +// CHECK-ENCODING: [0x97,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fff97 + +udot za.s[w8, 5, vgx4], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x15,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e15 + +udot za.s[w8, 5], {z16.h - z19.h}, z0.h[3] // 11000001-01010000-10011110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.h - z19.h }, z0.h[3] +// CHECK-ENCODING: [0x15,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e15 + +udot za.s[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x11,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9411 + +udot za.s[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-01011110-10010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x11,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9411 + +udot za.s[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00010000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x10,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d610 + +udot za.s[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-01010100-11010110-00010000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x10,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d610 + +udot za.s[w8, 0, vgx4], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x90,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529990 + +udot za.s[w8, 0], {z12.h - z15.h}, z2.h[2] // 11000001-01010010-10011001-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.h - z15.h }, z2.h[2] +// CHECK-ENCODING: [0x90,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1529990 + +udot za.s[w10, 1, vgx4], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x11,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad811 + +udot za.s[w10, 1], {z0.h - z3.h}, z10.h[2] // 11000001-01011010-11011000-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.h - z3.h }, z10.h[2] +// CHECK-ENCODING: [0x11,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad811 + +udot za.s[w8, 5, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x95,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a95 + +udot za.s[w8, 5], {z20.h - z23.h}, z14.h[2] // 11000001-01011110-10011010-10010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x95,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9a95 + +udot za.s[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x12,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f512 + +udot za.s[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-01010001-11110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x12,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f512 + +udot za.s[w9, 7, vgx4], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x97,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb997 + +udot za.s[w9, 7], {z12.h - z15.h}, z11.h[2] // 11000001-01011011-10111001-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.h - z15.h }, z11.h[2] +// CHECK-ENCODING: [0x97,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb997 + + +udot za.s[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11418 + +udot za.s[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11418 + +udot za.s[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00011101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x1d,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5551d + +udot za.s[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00011101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x1d,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5551d + +udot za.s[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10011111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x9f,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9759f + +udot za.s[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10011111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x9f,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9759f + +udot za.s[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10011111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9f,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd779f + +udot za.s[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10011111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9f,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd779f + +udot za.s[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00011101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x1d,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f1161d + +udot za.s[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00011101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x1d,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f1161d + +udot za.s[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00011001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x19,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1419 + +udot za.s[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00011001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x19,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1419 + +udot za.s[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00011000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x18,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55618 + +udot za.s[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00011000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x18,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55618 + +udot za.s[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x98,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11598 + +udot za.s[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10011000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x98,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11598 + +udot za.s[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00011001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x19,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95419 + +udot za.s[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00011001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x19,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95419 + +udot za.s[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10011101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9d,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd169d + +udot za.s[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10011101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9d,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd169d + +udot za.s[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00011010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x1a,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e1751a + +udot za.s[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00011010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x1a,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e1751a + +udot za.s[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x9f,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9359f + +udot za.s[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10011111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x9f,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e9359f + + +udot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00110000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x30,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509030 + +udot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00110000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x30,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509030 + +udot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00110101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x35,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d535 + +udot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00110101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x35,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d535 + +udot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xb7,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fdb7 + +udot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xb7,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fdb7 + +udot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xb7,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffb7 + +udot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xb7,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffb7 + +udot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x35,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e35 + +udot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x35,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e35 + +udot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x31,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9431 + +udot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x31,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9431 + +udot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00110000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x30,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d630 + +udot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00110000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x30,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d630 + +udot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10110000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb0,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299b0 + +udot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10110000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb0,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299b0 + +udot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x31,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad831 + +udot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x31,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad831 + +udot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10110101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xb5,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9ab5 + +udot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10110101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xb5,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9ab5 + +udot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x32,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f532 + +udot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x32,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f532 + +udot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10110111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xb7,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9b7 + +udot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10110111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xb7,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9b7 + + +udot za.d[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10000000-00011000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08018 + +udot za.d[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10000000-00011000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x80,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08018 + +udot za.d[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11000101-00011101 +// CHECK-INST: udot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x1d,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c51d + +udot za.d[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11000101-00011101 +// CHECK-INST: udot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x1d,0xc5,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5c51d + +udot za.d[w11, 7, vgx4], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11100101-10011111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x9f,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e59f + +udot za.d[w11, 7], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11100101-10011111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x9f,0xe5,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8e59f + +udot za.d[w11, 7, vgx4], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11100111-10011111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x9f,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe79f + +udot za.d[w11, 7], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11100111-10011111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x9f,0xe7,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfe79f + +udot za.d[w8, 5, vgx4], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10000110-00011101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x1d,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0861d + +udot za.d[w8, 5], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10000110-00011101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x1d,0x86,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d0861d + +udot za.d[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10000100-00011001 +// CHECK-INST: udot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8419 + +udot za.d[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10000100-00011001 +// CHECK-INST: udot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x84,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8419 + +udot za.d[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11000110-00011000 +// CHECK-INST: udot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x18,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c618 + +udot za.d[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11000110-00011000 +// CHECK-INST: udot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x18,0xc6,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4c618 + +udot za.d[w8, 0, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10000001-10011000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28198 + +udot za.d[w8, 0], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10000001-10011000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x81,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28198 + +udot za.d[w10, 1, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11000000-00011001 +// CHECK-INST: udot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac019 + +udot za.d[w10, 1], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11000000-00011001 +// CHECK-INST: udot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc0,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac019 + +udot za.d[w8, 5, vgx4], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10000010-10011101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x9d,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de829d + +udot za.d[w8, 5], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10000010-10011101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x9d,0x82,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de829d + +udot za.d[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11100101-00011010 +// CHECK-INST: udot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e51a + +udot za.d[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11100101-00011010 +// CHECK-INST: udot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0xe5,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1e51a + +udot za.d[w9, 7, vgx4], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10100001-10011111 +// CHECK-INST: udot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x9f,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba19f + +udot za.d[w9, 7], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10100001-10011111 +// CHECK-INST: udot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x9f,0xa1,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba19f + + +udot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x10,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201410 + +udot za.s[w8, 0], {z0.b - z1.b}, z0.b // 11000001-00100000-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x10,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201410 + +udot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x55,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255555 + +udot za.s[w10, 5], {z10.b - z11.b}, z5.b // 11000001-00100101-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x55,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1255555 + +udot za.s[w11, 7, vgx2], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb7,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875b7 + +udot za.s[w11, 7], {z13.b - z14.b}, z8.b // 11000001-00101000-01110101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb7,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875b7 + +udot za.s[w11, 7, vgx2], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf7,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77f7 + +udot za.s[w11, 7], {z31.b - z0.b}, z15.b // 11000001-00101111-01110111-11110111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf7,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77f7 + +udot za.s[w8, 5, vgx2], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x35,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201635 + +udot za.s[w8, 5], {z17.b - z18.b}, z0.b // 11000001-00100000-00010110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x35,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201635 + +udot za.s[w8, 1, vgx2], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x31,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1431 + +udot za.s[w8, 1], {z1.b - z2.b}, z14.b // 11000001-00101110-00010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x31,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1431 + +udot za.s[w10, 0, vgx2], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01110000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x70,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245670 + +udot za.s[w10, 0], {z19.b - z20.b}, z4.b // 11000001-00100100-01010110-01110000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x70,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245670 + +udot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x90,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221590 + +udot za.s[w8, 0], {z12.b - z13.b}, z2.b // 11000001-00100010-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x90,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221590 + +udot za.s[w10, 1, vgx2], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x31,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5431 + +udot za.s[w10, 1], {z1.b - z2.b}, z10.b // 11000001-00101010-01010100-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x31,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5431 + +udot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd5,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16d5 + +udot za.s[w8, 5], {z22.b - z23.b}, z14.b // 11000001-00101110-00010110-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd5,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16d5 + +udot za.s[w11, 2, vgx2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x32,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217532 + +udot za.s[w11, 2], {z9.b - z10.b}, z1.b // 11000001-00100001-01110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x32,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1217532 + +udot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x97,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3597 + +udot za.s[w9, 7], {z12.b - z13.b}, z11.b // 11000001-00101011-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x97,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b3597 + +udot za.s[w8, 0, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001-10100000-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x14,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01410 + +udot za.s[w8, 0], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x14,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01410 + +udot za.s[w10, 5, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001-10110100-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x55,0x55,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45555 + +udot za.s[w10, 5], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x55,0x55,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45555 + +udot za.s[w11, 7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001-10101000-01110101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x97,0x75,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87597 + +udot za.s[w11, 7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01110101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x97,0x75,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a87597 + +udot za.s[w11, 7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001-10111110-01110111-11010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd7,0x77,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be77d7 + +udot za.s[w11, 7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01110111-11010111 +// CHECK-INST: udot za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd7,0x77,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be77d7 + +udot za.s[w8, 5, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001-10110000-00010110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x15,0x16,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01615 + +udot za.s[w8, 5], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00010110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x15,0x16,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b01615 + +udot za.s[w8, 1, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001-10111110-00010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x11,0x14,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1411 + +udot za.s[w8, 1], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x11,0x14,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1411 + +udot za.s[w10, 0, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001-10110100-01010110-01010000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x50,0x56,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45650 + +udot za.s[w10, 0], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01010110-01010000 +// CHECK-INST: udot za.s[w10, 0, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x50,0x56,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45650 + +udot za.s[w8, 0, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001-10100010-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x90,0x15,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21590 + +udot za.s[w8, 0], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x90,0x15,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21590 + +udot za.s[w10, 1, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001-10111010-01010100-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x11,0x54,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5411 + +udot za.s[w10, 1], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01010100-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x11,0x54,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5411 + +udot za.s[w8, 5, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001-10111110-00010110-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd5,0x16,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be16d5 + +udot za.s[w8, 5], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00010110-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd5,0x16,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be16d5 + +udot za.s[w11, 2, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001-10100000-01110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x12,0x75,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07512 + +udot za.s[w11, 2], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x12,0x75,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a07512 + +udot za.s[w9, 7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001-10101010-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x97,0x35,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3597 + +udot za.s[w9, 7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x97,0x35,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa3597 + + +udot za.d[w8, 0, vgx2], {z0.h, z1.h}, z0.h // 11000001-01100000-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601410 + +udot za.d[w8, 0], {z0.h - z1.h}, z0.h // 11000001-01100000-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x14,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601410 + +udot za.d[w10, 5, vgx2], {z10.h, z11.h}, z5.h // 11000001-01100101-01010101-01010101 +// CHECK-INST: udot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x55,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1655555 + +udot za.d[w10, 5], {z10.h - z11.h}, z5.h // 11000001-01100101-01010101-01010101 +// CHECK-INST: udot za.d[w10, 5, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x55,0x55,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1655555 + +udot za.d[w11, 7, vgx2], {z13.h, z14.h}, z8.h // 11000001-01101000-01110101-10110111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb7,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875b7 + +udot za.d[w11, 7], {z13.h - z14.h}, z8.h // 11000001-01101000-01110101-10110111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb7,0x75,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16875b7 + +udot za.d[w11, 7, vgx2], {z31.h, z0.h}, z15.h // 11000001-01101111-01110111-11110111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf7,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77f7 + +udot za.d[w11, 7], {z31.h - z0.h}, z15.h // 11000001-01101111-01110111-11110111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf7,0x77,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f77f7 + +udot za.d[w8, 5, vgx2], {z17.h, z18.h}, z0.h // 11000001-01100000-00010110-00110101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x35,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601635 + +udot za.d[w8, 5], {z17.h - z18.h}, z0.h // 11000001-01100000-00010110-00110101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x35,0x16,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1601635 + +udot za.d[w8, 1, vgx2], {z1.h, z2.h}, z14.h // 11000001-01101110-00010100-00110001 +// CHECK-INST: udot za.d[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1431 + +udot za.d[w8, 1], {z1.h - z2.h}, z14.h // 11000001-01101110-00010100-00110001 +// CHECK-INST: udot za.d[w8, 1, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x14,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e1431 + +udot za.d[w10, 0, vgx2], {z19.h, z20.h}, z4.h // 11000001-01100100-01010110-01110000 +// CHECK-INST: udot za.d[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645670 + +udot za.d[w10, 0], {z19.h - z20.h}, z4.h // 11000001-01100100-01010110-01110000 +// CHECK-INST: udot za.d[w10, 0, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x56,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1645670 + +udot za.d[w8, 0, vgx2], {z12.h, z13.h}, z2.h // 11000001-01100010-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621590 + +udot za.d[w8, 0], {z12.h - z13.h}, z2.h // 11000001-01100010-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x15,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1621590 + +udot za.d[w10, 1, vgx2], {z1.h, z2.h}, z10.h // 11000001-01101010-01010100-00110001 +// CHECK-INST: udot za.d[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5431 + +udot za.d[w10, 1], {z1.h - z2.h}, z10.h // 11000001-01101010-01010100-00110001 +// CHECK-INST: udot za.d[w10, 1, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x54,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a5431 + +udot za.d[w8, 5, vgx2], {z22.h, z23.h}, z14.h // 11000001-01101110-00010110-11010101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd5,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16d5 + +udot za.d[w8, 5], {z22.h - z23.h}, z14.h // 11000001-01101110-00010110-11010101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd5,0x16,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e16d5 + +udot za.d[w11, 2, vgx2], {z9.h, z10.h}, z1.h // 11000001-01100001-01110101-00110010 +// CHECK-INST: udot za.d[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x32,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1617532 + +udot za.d[w11, 2], {z9.h - z10.h}, z1.h // 11000001-01100001-01110101-00110010 +// CHECK-INST: udot za.d[w11, 2, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x32,0x75,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1617532 + +udot za.d[w9, 7, vgx2], {z12.h, z13.h}, z11.h // 11000001-01101011-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x97,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b3597 + +udot za.d[w9, 7], {z12.h - z13.h}, z11.h // 11000001-01101011-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x97,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b3597 + +udot za.d[w9, 7], {z12.h - z13.h}, z11.h // 11000001-01101011-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x97,0x35,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b3597 + + +udot za.d[w8, 0, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001-11100000-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01410 + +udot za.d[w8, 0], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x14,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e01410 + +udot za.d[w10, 5, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001-11110100-01010101-01010101 +// CHECK-INST: udot za.d[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x55,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45555 + +udot za.d[w10, 5], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01010101-01010101 +// CHECK-INST: udot za.d[w10, 5, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x55,0x55,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45555 + +udot za.d[w11, 7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001-11101000-01110101-10010111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x97,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e87597 + +udot za.d[w11, 7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01110101-10010111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x97,0x75,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e87597 + +udot za.d[w11, 7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001-11111110-01110111-11010111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd7,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77d7 + +udot za.d[w11, 7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01110111-11010111 +// CHECK-INST: udot za.d[w11, 7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd7,0x77,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe77d7 + +udot za.d[w8, 5, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001-11110000-00010110-00010101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x15,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f01615 + +udot za.d[w8, 5], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00010110-00010101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x15,0x16,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f01615 + +udot za.d[w8, 1, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001-11111110-00010100-00010001 +// CHECK-INST: udot za.d[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1411 + +udot za.d[w8, 1], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00010100-00010001 +// CHECK-INST: udot za.d[w8, 1, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x14,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe1411 + +udot za.d[w10, 0, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001-11110100-01010110-01010000 +// CHECK-INST: udot za.d[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45650 + +udot za.d[w10, 0], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01010110-01010000 +// CHECK-INST: udot za.d[w10, 0, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x56,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f45650 + +udot za.d[w8, 0, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001-11100010-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21590 + +udot za.d[w8, 0], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x15,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e21590 + +udot za.d[w10, 1, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001-11111010-01010100-00010001 +// CHECK-INST: udot za.d[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5411 + +udot za.d[w10, 1], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01010100-00010001 +// CHECK-INST: udot za.d[w10, 1, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x54,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa5411 + +udot za.d[w8, 5, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001-11111110-00010110-11010101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd5,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16d5 + +udot za.d[w8, 5], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00010110-11010101 +// CHECK-INST: udot za.d[w8, 5, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd5,0x16,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe16d5 + +udot za.d[w11, 2, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001-11100000-01110101-00010010 +// CHECK-INST: udot za.d[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x12,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e07512 + +udot za.d[w11, 2], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01110101-00010010 +// CHECK-INST: udot za.d[w11, 2, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x12,0x75,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e07512 + +udot za.d[w9, 7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001-11101010-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x97,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea3597 + +udot za.d[w9, 7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x97,0x35,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea3597 + + +udot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x10,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301410 + +udot za.s[w8, 0], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x10,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301410 + +udot za.s[w10, 5, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x55,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355555 + +udot za.s[w10, 5], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01010101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x55,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1355555 + +udot za.s[w11, 7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb7,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875b7 + +udot za.s[w11, 7], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb7,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875b7 + +udot za.s[w11, 7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf7,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77f7 + +udot za.s[w11, 7], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11110111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf7,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77f7 + +udot za.s[w8, 5, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x35,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301635 + +udot za.s[w8, 5], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00110101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x35,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301635 + +udot za.s[w8, 1, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x31,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1431 + +udot za.s[w8, 1], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00110001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x31,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1431 + +udot za.s[w10, 0, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01110000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x70,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345670 + +udot za.s[w10, 0], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01110000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x70,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345670 + +udot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x90,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321590 + +udot za.s[w8, 0], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x90,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321590 + +udot za.s[w10, 1, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x31,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5431 + +udot za.s[w10, 1], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00110001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x31,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5431 + +udot za.s[w8, 5, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd5,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16d5 + +udot za.s[w8, 5], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd5,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16d5 + +udot za.s[w11, 2, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x32,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317532 + +udot za.s[w11, 2], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00110010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x32,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1317532 + +udot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x97,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3597 + +udot za.s[w9, 7], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x97,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b3597 + + +udot za.s[w8, 0, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x14,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11410 + +udot za.s[w8, 0], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00010100-00010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x14,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11410 + +udot za.s[w10, 5, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01010101-00010101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x15,0x55,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55515 + +udot za.s[w10, 5], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01010101-00010101 +// CHECK-INST: udot za.s[w10, 5, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x15,0x55,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55515 + +udot za.s[w11, 7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01110101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x97,0x75,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97597 + +udot za.s[w11, 7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01110101-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x97,0x75,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a97597 + +udot za.s[w11, 7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01110111-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x97,0x77,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7797 + +udot za.s[w11, 7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01110111-10010111 +// CHECK-INST: udot za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x97,0x77,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd7797 + +udot za.s[w8, 5, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00010110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x15,0x16,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11615 + +udot za.s[w8, 5], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00010110-00010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x15,0x16,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b11615 + +udot za.s[w8, 1, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x11,0x14,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1411 + +udot za.s[w8, 1], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00010100-00010001 +// CHECK-INST: udot za.s[w8, 1, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x11,0x14,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1411 + +udot za.s[w10, 0, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01010110-00010000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x10,0x56,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55610 + +udot za.s[w10, 0], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01010110-00010000 +// CHECK-INST: udot za.s[w10, 0, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x10,0x56,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55610 + +udot za.s[w8, 0, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x90,0x15,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11590 +udot za.s[w8, 0], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00010101-10010000 +// CHECK-INST: udot za.s[w8, 0, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x90,0x15,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11590 + +udot za.s[w10, 1, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01010100-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x11,0x54,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95411 + +udot za.s[w10, 1], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01010100-00010001 +// CHECK-INST: udot za.s[w10, 1, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x11,0x54,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95411 + +udot za.s[w8, 5, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00010110-10010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x95,0x16,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1695 + +udot za.s[w8, 5], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00010110-10010101 +// CHECK-INST: udot za.s[w8, 5, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x95,0x16,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1695 + +udot za.s[w11, 2, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x12,0x75,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17512 + +udot za.s[w11, 2], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01110101-00010010 +// CHECK-INST: udot za.s[w11, 2, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x12,0x75,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a17512 + +udot za.s[w9, 7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x97,0x35,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93597 + +udot za.s[w9, 7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00110101-10010111 +// CHECK-INST: udot za.s[w9, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x97,0x35,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a93597 + + +udot za.d[w8, 0, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701410 + +udot za.d[w8, 0], {z0.h - z3.h}, z0.h // 11000001-01110000-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x14,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701410 + +udot za.d[w10, 5, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01010101 +// CHECK-INST: udot za.d[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x55,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1755555 + +udot za.d[w10, 5], {z10.h - z13.h}, z5.h // 11000001-01110101-01010101-01010101 +// CHECK-INST: udot za.d[w10, 5, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x55,0x55,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1755555 + +udot za.d[w11, 7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10110111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb7,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875b7 + +udot za.d[w11, 7], {z13.h - z16.h}, z8.h // 11000001-01111000-01110101-10110111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb7,0x75,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17875b7 + +udot za.d[w11, 7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11110111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf7,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77f7 + +udot za.d[w11, 7], {z31.h - z2.h}, z15.h // 11000001-01111111-01110111-11110111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf7,0x77,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f77f7 + +udot za.d[w8, 5, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00110101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x35,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701635 + +udot za.d[w8, 5], {z17.h - z20.h}, z0.h // 11000001-01110000-00010110-00110101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x35,0x16,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1701635 + +udot za.d[w8, 1, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00110001 +// CHECK-INST: udot za.d[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1431 + +udot za.d[w8, 1], {z1.h - z4.h}, z14.h // 11000001-01111110-00010100-00110001 +// CHECK-INST: udot za.d[w8, 1, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x14,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e1431 + +udot za.d[w10, 0, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01110000 +// CHECK-INST: udot za.d[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745670 + +udot za.d[w10, 0], {z19.h - z22.h}, z4.h // 11000001-01110100-01010110-01110000 +// CHECK-INST: udot za.d[w10, 0, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x56,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1745670 + +udot za.d[w8, 0, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721590 + +udot za.d[w8, 0], {z12.h - z15.h}, z2.h // 11000001-01110010-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x15,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1721590 + +udot za.d[w10, 1, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00110001 +// CHECK-INST: udot za.d[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5431 + +udot za.d[w10, 1], {z1.h - z4.h}, z10.h // 11000001-01111010-01010100-00110001 +// CHECK-INST: udot za.d[w10, 1, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x54,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a5431 + +udot za.d[w8, 5, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11010101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd5,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16d5 + +udot za.d[w8, 5], {z22.h - z25.h}, z14.h // 11000001-01111110-00010110-11010101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd5,0x16,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e16d5 + +udot za.d[w11, 2, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00110010 +// CHECK-INST: udot za.d[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x32,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1717532 + +udot za.d[w11, 2], {z9.h - z12.h}, z1.h // 11000001-01110001-01110101-00110010 +// CHECK-INST: udot za.d[w11, 2, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x32,0x75,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1717532 + +udot za.d[w9, 7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x97,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b3597 + +udot za.d[w9, 7], {z12.h - z15.h}, z11.h // 11000001-01111011-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x97,0x35,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b3597 + +udot za.d[w8, 0, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11410 + +udot za.d[w8, 0], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00010100-00010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x14,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11410 + +udot za.d[w10, 5, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00010101 +// CHECK-INST: udot za.d[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x15,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55515 + +udot za.d[w10, 5], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01010101-00010101 +// CHECK-INST: udot za.d[w10, 5, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x15,0x55,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55515 + +udot za.d[w11, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10010111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e97597 + +udot za.d[w11, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01110101-10010111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x75,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e97597 + +udot za.d[w11, 7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10010111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x97,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd7797 + +udot za.d[w11, 7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01110111-10010111 +// CHECK-INST: udot za.d[w11, 7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x97,0x77,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd7797 + +udot za.d[w8, 5, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00010101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x15,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f11615 + +udot za.d[w8, 5], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00010110-00010101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x15,0x16,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f11615 + +udot za.d[w8, 1, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00010001 +// CHECK-INST: udot za.d[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1411 + +udot za.d[w8, 1], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00010100-00010001 +// CHECK-INST: udot za.d[w8, 1, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x14,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1411 + +udot za.d[w10, 0, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00010000 +// CHECK-INST: udot za.d[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55610 +udot za.d[w10, 0], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01010110-00010000 +// CHECK-INST: udot za.d[w10, 0, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x56,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f55610 + +udot za.d[w8, 0, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11590 + +udot za.d[w8, 0], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00010101-10010000 +// CHECK-INST: udot za.d[w8, 0, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x15,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e11590 + +udot za.d[w10, 1, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00010001 +// CHECK-INST: udot za.d[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95411 + +udot za.d[w10, 1], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01010100-00010001 +// CHECK-INST: udot za.d[w10, 1, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x54,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f95411 + +udot za.d[w8, 5, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10010101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x95,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1695 + +udot za.d[w8, 5], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00010110-10010101 +// CHECK-INST: udot za.d[w8, 5, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x95,0x16,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd1695 + +udot za.d[w11, 2, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00010010 +// CHECK-INST: udot za.d[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x12,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e17512 + +udot za.d[w11, 2], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01110101-00010010 +// CHECK-INST: udot za.d[w11, 2, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x12,0x75,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e17512 + +udot za.d[w9, 7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e93597 + +udot za.d[w9, 7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00110101-10010111 +// CHECK-INST: udot za.d[w9, 7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x97,0x35,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e93597 + + diff --git a/llvm/test/MC/AArch64/SME2/umax-diagnostics.s b/llvm/test/MC/AArch64/SME2/umax-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umax-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +umax {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umax {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umax {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: umax {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +umax {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umax {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +umax {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umax {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umax.s b/llvm/test/MC/AArch64/SME2/umax.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umax.s @@ -0,0 +1,413 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umax {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100000-00000001 +// CHECK-INST: umax { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x01,0xa0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a001 + +umax {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100000-00010101 +// CHECK-INST: umax { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x15,0xa0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a015 + +umax {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100000-00010111 +// CHECK-INST: umax { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x17,0xa0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a017 + +umax {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100000-00011111 +// CHECK-INST: umax { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x1f,0xa0,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa01f + + +umax {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110000-00000001 +// CHECK-INST: umax { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xb0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b001 + +umax {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110000-00010101 +// CHECK-INST: umax { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x15,0xb0,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b015 + +umax {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110000-00010111 +// CHECK-INST: umax { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x17,0xb0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b017 + +umax {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110000-00011111 +// CHECK-INST: umax { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x1f,0xb0,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb01f + + +umax {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100000-00000001 +// CHECK-INST: umax { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x01,0xa0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a001 + +umax {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100000-00010101 +// CHECK-INST: umax { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x15,0xa0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a015 + +umax {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100000-00010111 +// CHECK-INST: umax { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x17,0xa0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a017 + +umax {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100000-00011111 +// CHECK-INST: umax { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x1f,0xa0,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa01f + + +umax {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110000-00000001 +// CHECK-INST: umax { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xb0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b001 + +umax {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110000-00010101 +// CHECK-INST: umax { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x15,0xb0,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b015 + +umax {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110000-00010111 +// CHECK-INST: umax { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x17,0xb0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b017 + +umax {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110000-00011111 +// CHECK-INST: umax { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x1f,0xb0,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb01f + + +umax {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100000-00000001 +// CHECK-INST: umax { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x01,0xa0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a001 + +umax {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100000-00010101 +// CHECK-INST: umax { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x15,0xa0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a015 + +umax {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100000-00010111 +// CHECK-INST: umax { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x17,0xa0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a017 + +umax {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100000-00011111 +// CHECK-INST: umax { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x1f,0xa0,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa01f + + +umax {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110000-00000001 +// CHECK-INST: umax { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x01,0xb0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b001 + +umax {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110000-00010101 +// CHECK-INST: umax { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x15,0xb0,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b015 + +umax {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110000-00010111 +// CHECK-INST: umax { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x17,0xb0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b017 + +umax {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110000-00011111 +// CHECK-INST: umax { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x1f,0xb0,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb01f + + +umax {z0.b, z1.b}, {z0.b, z1.b}, z0.b // 11000001-00100000-10100000-00000001 +// CHECK-INST: umax { z0.b, z1.b }, { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x01,0xa0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a001 + +umax {z20.b, z21.b}, {z20.b, z21.b}, z5.b // 11000001-00100101-10100000-00010101 +// CHECK-INST: umax { z20.b, z21.b }, { z20.b, z21.b }, z5.b +// CHECK-ENCODING: [0x15,0xa0,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a015 + +umax {z22.b, z23.b}, {z22.b, z23.b}, z8.b // 11000001-00101000-10100000-00010111 +// CHECK-INST: umax { z22.b, z23.b }, { z22.b, z23.b }, z8.b +// CHECK-ENCODING: [0x17,0xa0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a017 + +umax {z30.b, z31.b}, {z30.b, z31.b}, z15.b // 11000001-00101111-10100000-00011111 +// CHECK-INST: umax { z30.b, z31.b }, { z30.b, z31.b }, z15.b +// CHECK-ENCODING: [0x1f,0xa0,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa01f + + +umax {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110000-00000001 +// CHECK-INST: umax { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0xb0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b001 + +umax {z20.b, z21.b}, {z20.b, z21.b}, {z20.b, z21.b} // 11000001-00110100-10110000-00010101 +// CHECK-INST: umax { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x15,0xb0,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b015 + +umax {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110000-00010111 +// CHECK-INST: umax { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x17,0xb0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b017 + +umax {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110000-00011111 +// CHECK-INST: umax { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x1f,0xb0,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb01f + + +umax {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101000-00000001 +// CHECK-INST: umax { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x01,0xa8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a801 + +umax {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101000-00010101 +// CHECK-INST: umax { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x15,0xa8,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a815 + +umax {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101000-00010101 +// CHECK-INST: umax { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x15,0xa8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a815 + +umax {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101000-00011101 +// CHECK-INST: umax { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x1d,0xa8,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa81d + + +umax {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111000-00000001 +// CHECK-INST: umax { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x01,0xb8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b801 + +umax {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111000-00010101 +// CHECK-INST: umax { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x15,0xb8,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b815 + +umax {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111000-00010101 +// CHECK-INST: umax { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x15,0xb8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b815 + +umax {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111000-00011101 +// CHECK-INST: umax { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x1d,0xb8,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb81d + + +umax {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101000-00000001 +// CHECK-INST: umax { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x01,0xa8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a801 + +umax {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101000-00010101 +// CHECK-INST: umax { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x15,0xa8,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a815 + +umax {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101000-00010101 +// CHECK-INST: umax { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x15,0xa8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a815 + +umax {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101000-00011101 +// CHECK-INST: umax { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x1d,0xa8,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa81d + + +umax {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111000-00000001 +// CHECK-INST: umax { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x01,0xb8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b801 + +umax {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111000-00010101 +// CHECK-INST: umax { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x15,0xb8,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b815 + +umax {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111000-00010101 +// CHECK-INST: umax { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x15,0xb8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b815 + +umax {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111000-00011101 +// CHECK-INST: umax { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x1d,0xb8,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb81d + + +umax {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101000-00000001 +// CHECK-INST: umax { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x01,0xa8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a801 + +umax {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101000-00010101 +// CHECK-INST: umax { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x15,0xa8,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a815 + +umax {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101000-00010101 +// CHECK-INST: umax { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x15,0xa8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a815 + +umax {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101000-00011101 +// CHECK-INST: umax { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x1d,0xa8,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa81d + + +umax {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111000-00000001 +// CHECK-INST: umax { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x01,0xb8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b801 + +umax {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111000-00010101 +// CHECK-INST: umax { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x15,0xb8,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b815 + +umax {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111000-00010101 +// CHECK-INST: umax { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x15,0xb8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b815 + +umax {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111000-00011101 +// CHECK-INST: umax { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x1d,0xb8,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb81d + + +umax {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101000-00000001 +// CHECK-INST: umax { z0.b - z3.b }, { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x01,0xa8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a801 + +umax {z20.b - z23.b}, {z20.b - z23.b}, z5.b // 11000001-00100101-10101000-00010101 +// CHECK-INST: umax { z20.b - z23.b }, { z20.b - z23.b }, z5.b +// CHECK-ENCODING: [0x15,0xa8,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a815 + +umax {z20.b - z23.b}, {z20.b - z23.b}, z8.b // 11000001-00101000-10101000-00010101 +// CHECK-INST: umax { z20.b - z23.b }, { z20.b - z23.b }, z8.b +// CHECK-ENCODING: [0x15,0xa8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a815 + +umax {z28.b - z31.b}, {z28.b - z31.b}, z15.b // 11000001-00101111-10101000-00011101 +// CHECK-INST: umax { z28.b - z31.b }, { z28.b - z31.b }, z15.b +// CHECK-ENCODING: [0x1d,0xa8,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa81d + + +umax {z0.b - z3.b}, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100000-10111000-00000001 +// CHECK-INST: umax { z0.b - z3.b }, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x01,0xb8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b801 + +umax {z20.b - z23.b}, {z20.b - z23.b}, {z20.b - z23.b} // 11000001-00110100-10111000-00010101 +// CHECK-INST: umax { z20.b - z23.b }, { z20.b - z23.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x15,0xb8,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b815 + +umax {z20.b - z23.b}, {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00101000-10111000-00010101 +// CHECK-INST: umax { z20.b - z23.b }, { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x15,0xb8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b815 + +umax {z28.b - z31.b}, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111100-10111000-00011101 +// CHECK-INST: umax { z28.b - z31.b }, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x1d,0xb8,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13cb81d + diff --git a/llvm/test/MC/AArch64/SME2/umin-diagnostics.s b/llvm/test/MC/AArch64/SME2/umin-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umin-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +umin {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umin {z0.h, z1.h}, {z0.h-z2.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umin {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element type +// CHECK-NEXT: umin {z1.d-z2.d}, {z0.d, z1.d}, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid single register + +umin {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umin {z0.b, z1.b}, {z2.b-z3.b}, z31.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +umin {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umin {z0.b, z1.b}, {z2.b-z3.b}, z14.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umin.s b/llvm/test/MC/AArch64/SME2/umin.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umin.s @@ -0,0 +1,413 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umin {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100000-00100001 +// CHECK-INST: umin { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x21,0xa0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a021 + +umin {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100000-00110101 +// CHECK-INST: umin { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x35,0xa0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a035 + +umin {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100000-00110111 +// CHECK-INST: umin { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x37,0xa0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a037 + +umin {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100000-00111111 +// CHECK-INST: umin { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x3f,0xa0,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa03f + + +umin {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110000-00100001 +// CHECK-INST: umin { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x21,0xb0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b021 + +umin {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110000-00110101 +// CHECK-INST: umin { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x35,0xb0,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b035 + +umin {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110000-00110111 +// CHECK-INST: umin { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x37,0xb0,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b037 + +umin {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110000-00111111 +// CHECK-INST: umin { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x3f,0xb0,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb03f + + +umin {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100000-00100001 +// CHECK-INST: umin { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x21,0xa0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a021 + +umin {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100000-00110101 +// CHECK-INST: umin { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x35,0xa0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a035 + +umin {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100000-00110111 +// CHECK-INST: umin { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x37,0xa0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a037 + +umin {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100000-00111111 +// CHECK-INST: umin { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x3f,0xa0,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa03f + + +umin {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110000-00100001 +// CHECK-INST: umin { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x21,0xb0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b021 + +umin {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110000-00110101 +// CHECK-INST: umin { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x35,0xb0,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b035 + +umin {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110000-00110111 +// CHECK-INST: umin { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x37,0xb0,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b037 + +umin {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110000-00111111 +// CHECK-INST: umin { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x3f,0xb0,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb03f + + +umin {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100000-00100001 +// CHECK-INST: umin { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x21,0xa0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a021 + +umin {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100000-00110101 +// CHECK-INST: umin { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x35,0xa0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a035 + +umin {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100000-00110111 +// CHECK-INST: umin { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x37,0xa0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a037 + +umin {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100000-00111111 +// CHECK-INST: umin { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x3f,0xa0,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa03f + + +umin {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110000-00100001 +// CHECK-INST: umin { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x21,0xb0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b021 + +umin {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110000-00110101 +// CHECK-INST: umin { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x35,0xb0,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b035 + +umin {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110000-00110111 +// CHECK-INST: umin { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x37,0xb0,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b037 + +umin {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110000-00111111 +// CHECK-INST: umin { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x3f,0xb0,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb03f + + +umin {z0.b, z1.b}, {z0.b, z1.b}, z0.b // 11000001-00100000-10100000-00100001 +// CHECK-INST: umin { z0.b, z1.b }, { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x21,0xa0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a021 + +umin {z20.b, z21.b}, {z20.b, z21.b}, z5.b // 11000001-00100101-10100000-00110101 +// CHECK-INST: umin { z20.b, z21.b }, { z20.b, z21.b }, z5.b +// CHECK-ENCODING: [0x35,0xa0,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a035 + +umin {z22.b, z23.b}, {z22.b, z23.b}, z8.b // 11000001-00101000-10100000-00110111 +// CHECK-INST: umin { z22.b, z23.b }, { z22.b, z23.b }, z8.b +// CHECK-ENCODING: [0x37,0xa0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a037 + +umin {z30.b, z31.b}, {z30.b, z31.b}, z15.b // 11000001-00101111-10100000-00111111 +// CHECK-INST: umin { z30.b, z31.b }, { z30.b, z31.b }, z15.b +// CHECK-ENCODING: [0x3f,0xa0,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa03f + + +umin {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110000-00100001 +// CHECK-INST: umin { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x21,0xb0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b021 + +umin {z20.b, z21.b}, {z20.b, z21.b}, {z20.b, z21.b} // 11000001-00110100-10110000-00110101 +// CHECK-INST: umin { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x35,0xb0,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b035 + +umin {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110000-00110111 +// CHECK-INST: umin { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x37,0xb0,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b037 + +umin {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110000-00111111 +// CHECK-INST: umin { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x3f,0xb0,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb03f + + +umin {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101000-00100001 +// CHECK-INST: umin { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x21,0xa8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a821 + +umin {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101000-00110101 +// CHECK-INST: umin { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x35,0xa8,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a835 + +umin {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101000-00110101 +// CHECK-INST: umin { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x35,0xa8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a835 + +umin {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101000-00111101 +// CHECK-INST: umin { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x3d,0xa8,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa83d + + +umin {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111000-00100001 +// CHECK-INST: umin { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x21,0xb8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b821 + +umin {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111000-00110101 +// CHECK-INST: umin { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x35,0xb8,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b835 + +umin {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111000-00110101 +// CHECK-INST: umin { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x35,0xb8,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b835 + +umin {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111000-00111101 +// CHECK-INST: umin { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x3d,0xb8,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cb83d + + +umin {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101000-00100001 +// CHECK-INST: umin { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x21,0xa8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a821 + +umin {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101000-00110101 +// CHECK-INST: umin { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x35,0xa8,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a835 + +umin {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101000-00110101 +// CHECK-INST: umin { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x35,0xa8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a835 + +umin {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101000-00111101 +// CHECK-INST: umin { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x3d,0xa8,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa83d + + +umin {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111000-00100001 +// CHECK-INST: umin { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x21,0xb8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b821 + +umin {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111000-00110101 +// CHECK-INST: umin { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x35,0xb8,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b835 + +umin {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111000-00110101 +// CHECK-INST: umin { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x35,0xb8,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b835 + +umin {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111000-00111101 +// CHECK-INST: umin { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x3d,0xb8,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcb83d + + +umin {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101000-00100001 +// CHECK-INST: umin { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x21,0xa8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a821 + +umin {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101000-00110101 +// CHECK-INST: umin { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x35,0xa8,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a835 + +umin {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101000-00110101 +// CHECK-INST: umin { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x35,0xa8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a835 + +umin {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101000-00111101 +// CHECK-INST: umin { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x3d,0xa8,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa83d + + +umin {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111000-00100001 +// CHECK-INST: umin { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x21,0xb8,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b821 + +umin {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111000-00110101 +// CHECK-INST: umin { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x35,0xb8,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b835 + +umin {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111000-00110101 +// CHECK-INST: umin { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x35,0xb8,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b835 + +umin {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111000-00111101 +// CHECK-INST: umin { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x3d,0xb8,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcb83d + + +umin {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101000-00100001 +// CHECK-INST: umin { z0.b - z3.b }, { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x21,0xa8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a821 + +umin {z20.b - z23.b}, {z20.b - z23.b}, z5.b // 11000001-00100101-10101000-00110101 +// CHECK-INST: umin { z20.b - z23.b }, { z20.b - z23.b }, z5.b +// CHECK-ENCODING: [0x35,0xa8,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a835 + +umin {z20.b - z23.b}, {z20.b - z23.b}, z8.b // 11000001-00101000-10101000-00110101 +// CHECK-INST: umin { z20.b - z23.b }, { z20.b - z23.b }, z8.b +// CHECK-ENCODING: [0x35,0xa8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a835 + +umin {z28.b - z31.b}, {z28.b - z31.b}, z15.b // 11000001-00101111-10101000-00111101 +// CHECK-INST: umin { z28.b - z31.b }, { z28.b - z31.b }, z15.b +// CHECK-ENCODING: [0x3d,0xa8,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa83d + + +umin {z0.b - z3.b}, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100000-10111000-00100001 +// CHECK-INST: umin { z0.b - z3.b }, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x21,0xb8,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b821 + +umin {z20.b - z23.b}, {z20.b - z23.b}, {z20.b - z23.b} // 11000001-00110100-10111000-00110101 +// CHECK-INST: umin { z20.b - z23.b }, { z20.b - z23.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x35,0xb8,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b835 + +umin {z20.b - z23.b}, {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00101000-10111000-00110101 +// CHECK-INST: umin { z20.b - z23.b }, { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x35,0xb8,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b835 + +umin {z28.b - z31.b}, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111100-10111000-00111101 +// CHECK-INST: umin { z28.b - z31.b }, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x3d,0xb8,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13cb83d + diff --git a/llvm/test/MC/AArch64/SME2/umlall-diagnostics.s b/llvm/test/MC/AArch64/SME2/umlall-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlall-diagnostics.s @@ -0,0 +1,74 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +umlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: umlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: umlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +umlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: umlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +umlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: umlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w12, 6:7], z12.h, z16.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: umlall za.d[w12, 6:7], z12.h, z16.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +umlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: umlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +umlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: umlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +umlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: umlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umlall-diagnostics.s-e b/llvm/test/MC/AArch64/SME2/umlall-diagnostics.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlall-diagnostics.s-e @@ -0,0 +1,74 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +umlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlall za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: umlall za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: umlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +umlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: umlall za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +umlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlall za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w12, 6:7], z12.h, z16.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlall za.d[w12, 6:7], z12.h, z16.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +umlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: umlall za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +umlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: umlall za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +umlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: umlall za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umlall.s b/llvm/test/MC/AArch64/SME2/umlall.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlall.s @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umlall za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00010000 +// CHECK-INST: umlall za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x10,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200410 + +umlall za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01010001 +// CHECK-INST: umlall za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x51,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254551 + +umlall za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10110011 +// CHECK-INST: umlall za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xb3,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865b3 + +umlall za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11110011 +// CHECK-INST: umlall za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xf3,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67f3 + +umlall za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x31,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200631 + +umlall za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x31,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0431 + +umlall za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01110000 +// CHECK-INST: umlall za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x70,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244670 + +umlall za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10010000 +// CHECK-INST: umlall za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x90,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220590 + +umlall za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00110001 +// CHECK-INST: umlall za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x31,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4431 + +umlall za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11010001 +// CHECK-INST: umlall za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xd1,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06d1 + +umlall za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00110010 +// CHECK-INST: umlall za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x32,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216532 + +umlall za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10010011 +// CHECK-INST: umlall za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x93,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2593 + + +umlall za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x10,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000010 + +umlall za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01010001 +// CHECK-INST: umlall za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x51,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055551 + +umlall za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10110011 +// CHECK-INST: umlall za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xb3,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108edb3 + +umlall za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11110011 +// CHECK-INST: umlall za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xf3,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ffff3 + +umlall za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x31,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e31 + +umlall za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x31,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8431 + +umlall za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01110000 +// CHECK-INST: umlall za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x70,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045670 + +umlall za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x90,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021990 + +umlall za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00110001 +// CHECK-INST: umlall za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x31,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac831 + +umlall za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11010001 +// CHECK-INST: umlall za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xd1,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ad1 + +umlall za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00110010 +// CHECK-INST: umlall za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x32,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f532 + +umlall za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10010011 +// CHECK-INST: umlall za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x93,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba993 + + +umlall za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00010000 +// CHECK-INST: umlall za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x10,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600410 + +umlall za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01010001 +// CHECK-INST: umlall za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x51,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654551 + +umlall za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10110011 +// CHECK-INST: umlall za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xb3,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865b3 + +umlall za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11110011 +// CHECK-INST: umlall za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xf3,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67f3 + +umlall za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x31,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600631 + +umlall za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x31,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0431 + +umlall za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01110000 +// CHECK-INST: umlall za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x70,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644670 + +umlall za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10010000 +// CHECK-INST: umlall za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x90,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620590 + +umlall za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00110001 +// CHECK-INST: umlall za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x31,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4431 + +umlall za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11010001 +// CHECK-INST: umlall za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xd1,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06d1 + +umlall za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00110010 +// CHECK-INST: umlall za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x32,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616532 + +umlall za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10010011 +// CHECK-INST: umlall za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x93,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2593 + + +umlall za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x10,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800010 + +umlall za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01010001 +// CHECK-INST: umlall za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x51,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854551 + +umlall za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10110011 +// CHECK-INST: umlall za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xb3,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188edb3 + +umlall za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11110011 +// CHECK-INST: umlall za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xf3,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18feff3 + +umlall za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x31,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e31 + +umlall za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x31,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8431 + +umlall za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01110000 +// CHECK-INST: umlall za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x70,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844670 + +umlall za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x90,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820990 + +umlall za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00110001 +// CHECK-INST: umlall za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x31,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac831 + +umlall za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11010001 +// CHECK-INST: umlall za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xd1,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ad1 + +umlall za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00110010 +// CHECK-INST: umlall za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x32,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e532 + +umlall za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10010011 +// CHECK-INST: umlall za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x93,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba993 + + +umlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200010 + +umlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200010 + +umlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254151 + +umlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254151 + +umlall za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b1 + +umlall za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b1 + +umlall za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f1 + +umlall za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f1 + +umlall za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200231 + +umlall za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200231 + +umlall za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0031 + +umlall za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0031 + +umlall za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01110000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244270 + +umlall za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01110000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244270 + +umlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220190 + +umlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220190 + +umlall za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00110001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4031 + +umlall za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00110001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4031 + +umlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d1 + +umlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d1 + +umlall za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00110000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216130 + +umlall za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00110000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216130 + +umlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2191 + +umlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2191 + + +umlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100010 + +umlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100010 + +umlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01010101 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x55,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154555 + +umlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01010101 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x55,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154555 + +umlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d97 + +umlall za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d97 + +umlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xd7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fd7 + +umlall za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xd7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fd7 + +umlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e15 + +umlall za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e15 + +umlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0411 + +umlall za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0411 + +umlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x50,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144650 + +umlall za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x50,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144650 + +umlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120990 + +umlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120990 + +umlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4811 + +umlall za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4811 + +umlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xd5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ad5 + +umlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xd5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ad5 + +umlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00010010 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116512 + +umlall za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00010010 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116512 + +umlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10010111 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2997 + +umlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10010111 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2997 + + +umlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00010 + +umlall za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00010 + +umlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x51,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44151 + +umlall za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x51,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44151 + +umlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x91,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86191 + +umlall za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x91,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86191 + +umlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d1 + +umlall za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d1 + +umlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x11,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00211 + +umlall za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x11,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00211 + +umlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0011 + +umlall za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0011 + +umlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x50,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44250 + +umlall za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x50,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44250 + +umlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20190 + +umlall za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20190 + +umlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x11,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4011 + +umlall za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x11,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4011 + +umlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d1 + +umlall za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d1 + +umlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00010000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06110 + +umlall za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00010000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06110 + +umlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x91,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2191 + +umlall za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x91,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2191 + + +umlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600010 + +umlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600010 + +umlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654151 + +umlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654151 + +umlall za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b1 + +umlall za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b1 + +umlall za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f1 + +umlall za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f1 + +umlall za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600231 + +umlall za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600231 + +umlall za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0031 + +umlall za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0031 + +umlall za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01110000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644270 + +umlall za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01110000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644270 + +umlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620190 + +umlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620190 + +umlall za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00110001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4031 + +umlall za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00110001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4031 + +umlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d1 + +umlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d1 + +umlall za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00110000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616130 + +umlall za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00110000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616130 + +umlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2191 + +umlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2191 + + +umlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900010 + +umlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900010 + +umlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01010101 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x55,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954555 + +umlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01010101 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x55,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954555 + +umlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986597 + +umlall za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986597 + +umlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xd7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67d7 + +umlall za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xd7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67d7 + +umlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900615 + +umlall za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900615 + +umlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0411 + +umlall za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0411 + +umlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x50,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944650 + +umlall za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x50,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944650 + +umlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920190 + +umlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920190 + +umlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4011 + +umlall za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4011 + +umlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xd5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02d5 + +umlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xd5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02d5 + +umlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00010010 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916512 + +umlall za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00010010 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916512 + +umlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10010111 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2197 + +umlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10010111 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2197 + + +umlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00010 + +umlall za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00010 + +umlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44151 + +umlall za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44151 + +umlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x91,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86191 + +umlall za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x91,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86191 + +umlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d1 + +umlall za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d1 + +umlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x11,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00211 + +umlall za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x11,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00211 + +umlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0011 + +umlall za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0011 + +umlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44250 + +umlall za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44250 + +umlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20190 + +umlall za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20190 + +umlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4011 + +umlall za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4011 + +umlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d1 + +umlall za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d1 + +umlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00010000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06110 + +umlall za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00010000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06110 + +umlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x91,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2191 + +umlall za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x91,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2191 + + +umlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300010 + +umlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300010 + +umlall za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354151 + +umlall za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354151 + +umlall za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b1 + +umlall za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b1 + +umlall za.s[w11, 4:7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f1 + +umlall za.s[w11, 4:7], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f1 + +umlall za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300231 + +umlall za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300231 + +umlall za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0031 + +umlall za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0031 + +umlall za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01110000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344270 + +umlall za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01110000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344270 + +umlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320190 + +umlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320190 + +umlall za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00110001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4031 + +umlall za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00110001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4031 + +umlall za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d1 + +umlall za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d1 + +umlall za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00110000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316130 + +umlall za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00110000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316130 + +umlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2191 + +umlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2191 + + +umlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108010 + +umlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108010 + +umlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00010101 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x15,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c515 + +umlall za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00010101 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x15,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c515 + +umlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed97 + +umlall za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed97 + +umlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x97,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef97 + +umlall za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x97,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef97 + +umlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e15 + +umlall za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e15 + +umlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8411 + +umlall za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8411 + +umlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x10,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c610 + +umlall za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x10,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c610 + +umlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128990 + +umlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128990 + +umlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac811 + +umlall za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac811 + +umlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x95,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a95 + +umlall za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x95,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a95 + +umlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00010010 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e512 + +umlall za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00010010 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e512 + +umlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10010111 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba997 + +umlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10010111 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba997 + + +umlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10010 + +umlall za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10010 + +umlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x11,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54111 + +umlall za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x11,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54111 + +umlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96191 + +umlall za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96191 + +umlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6391 + +umlall za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6391 + +umlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x11,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10211 + +umlall za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x11,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10211 + +umlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0011 + +umlall za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0011 + +umlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x10,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54210 + +umlall za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x10,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54210 + +umlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10190 + +umlall za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10190 + +umlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x11,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94011 + +umlall za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x11,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94011 + +umlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0291 + +umlall za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0291 + +umlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00010000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16110 + +umlall za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00010000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16110 + +umlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92191 + +umlall za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92191 + + +umlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700010 + +umlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700010 + +umlall za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754151 + +umlall za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754151 + +umlall za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b1 + +umlall za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b1 + +umlall za.d[w11, 4:7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f1 + +umlall za.d[w11, 4:7], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f1 + +umlall za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700231 + +umlall za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700231 + +umlall za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0031 + +umlall za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0031 + +umlall za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01110000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744270 + +umlall za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01110000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744270 + +umlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720190 + +umlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720190 + +umlall za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00110001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4031 + +umlall za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00110001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4031 + +umlall za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d1 + +umlall za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d1 + +umlall za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00110000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716130 + +umlall za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00110000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716130 + +umlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2191 + +umlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2191 + + +umlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908010 + +umlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908010 + +umlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00010101 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x15,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c515 + +umlall za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00010101 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x15,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c515 + +umlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e597 + +umlall za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e597 + +umlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x97,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe797 + +umlall za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x97,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe797 + +umlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908615 + +umlall za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908615 + +umlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8411 + +umlall za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8411 + +umlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x10,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c610 + +umlall za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x10,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c610 + +umlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928190 + +umlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928190 + +umlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac011 + +umlall za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac011 + +umlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x95,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8295 + +umlall za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x95,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8295 + +umlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00010010 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e512 + +umlall za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00010010 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e512 + +umlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10010111 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba197 + +umlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10010111 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba197 + + +umlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10010 + +umlall za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10010 + +umlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x11,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54111 + +umlall za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x11,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54111 + +umlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96191 + +umlall za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96191 + +umlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6391 + +umlall za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6391 + +umlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x11,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10211 + +umlall za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x11,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10211 + +umlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0011 + +umlall za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0011 + +umlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54210 + +umlall za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54210 + +umlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10190 + +umlall za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10190 + +umlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94011 + +umlall za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94011 + +umlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0291 + +umlall za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0291 + +umlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00010000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16110 + +umlall za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00010000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16110 + +umlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92191 + +umlall za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92191 + diff --git a/llvm/test/MC/AArch64/SME2/umlall.s-e b/llvm/test/MC/AArch64/SME2/umlall.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlall.s-e @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umlall za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00010000 +// CHECK-INST: umlall za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x10,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200410 + +umlall za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01010001 +// CHECK-INST: umlall za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x51,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254551 + +umlall za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10110011 +// CHECK-INST: umlall za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xb3,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865b3 + +umlall za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11110011 +// CHECK-INST: umlall za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xf3,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67f3 + +umlall za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x31,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200631 + +umlall za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x31,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0431 + +umlall za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01110000 +// CHECK-INST: umlall za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x70,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244670 + +umlall za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10010000 +// CHECK-INST: umlall za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x90,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220590 + +umlall za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00110001 +// CHECK-INST: umlall za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x31,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4431 + +umlall za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11010001 +// CHECK-INST: umlall za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xd1,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06d1 + +umlall za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00110010 +// CHECK-INST: umlall za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x32,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216532 + +umlall za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10010011 +// CHECK-INST: umlall za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x93,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2593 + + +umlall za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x10,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000010 + +umlall za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01010001 +// CHECK-INST: umlall za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x51,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055551 + +umlall za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10110011 +// CHECK-INST: umlall za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xb3,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108edb3 + +umlall za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11110011 +// CHECK-INST: umlall za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xf3,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ffff3 + +umlall za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x31,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e31 + +umlall za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00110001 +// CHECK-INST: umlall za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x31,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8431 + +umlall za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01110000 +// CHECK-INST: umlall za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x70,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045670 + +umlall za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x90,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021990 + +umlall za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00110001 +// CHECK-INST: umlall za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x31,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac831 + +umlall za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11010001 +// CHECK-INST: umlall za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xd1,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ad1 + +umlall za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00110010 +// CHECK-INST: umlall za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x32,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f532 + +umlall za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10010011 +// CHECK-INST: umlall za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x93,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba993 + + +umlall za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00010000 +// CHECK-INST: umlall za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x10,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600410 + +umlall za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01010001 +// CHECK-INST: umlall za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x51,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654551 + +umlall za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10110011 +// CHECK-INST: umlall za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xb3,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865b3 + +umlall za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11110011 +// CHECK-INST: umlall za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xf3,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67f3 + +umlall za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x31,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600631 + +umlall za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x31,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0431 + +umlall za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01110000 +// CHECK-INST: umlall za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x70,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644670 + +umlall za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10010000 +// CHECK-INST: umlall za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x90,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620590 + +umlall za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00110001 +// CHECK-INST: umlall za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x31,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4431 + +umlall za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11010001 +// CHECK-INST: umlall za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xd1,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06d1 + +umlall za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00110010 +// CHECK-INST: umlall za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x32,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616532 + +umlall za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10010011 +// CHECK-INST: umlall za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x93,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2593 + + +umlall za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x10,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800010 + +umlall za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01010001 +// CHECK-INST: umlall za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x51,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854551 + +umlall za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10110011 +// CHECK-INST: umlall za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xb3,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188edb3 + +umlall za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11110011 +// CHECK-INST: umlall za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xf3,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18feff3 + +umlall za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x31,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e31 + +umlall za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00110001 +// CHECK-INST: umlall za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x31,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8431 + +umlall za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01110000 +// CHECK-INST: umlall za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x70,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844670 + +umlall za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x90,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820990 + +umlall za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00110001 +// CHECK-INST: umlall za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x31,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac831 + +umlall za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11010001 +// CHECK-INST: umlall za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xd1,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ad1 + +umlall za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00110010 +// CHECK-INST: umlall za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x32,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e532 + +umlall za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10010011 +// CHECK-INST: umlall za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x93,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba993 + + +umlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200010 + +umlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200010 + +umlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254151 + +umlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254151 + +umlall za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b1 + +umlall za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b1 + +umlall za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f1 + +umlall za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11110001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f1 + +umlall za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200231 + +umlall za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200231 + +umlall za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0031 + +umlall za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00110001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0031 + +umlall za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01110000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244270 + +umlall za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01110000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244270 + +umlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220190 + +umlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220190 + +umlall za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00110001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4031 + +umlall za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00110001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4031 + +umlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d1 + +umlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d1 + +umlall za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00110000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216130 + +umlall za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00110000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216130 + +umlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2191 + +umlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2191 + + +umlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100010 + +umlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100010 + +umlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01010101 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x55,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154555 + +umlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01010101 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x55,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154555 + +umlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d97 + +umlall za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d97 + +umlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xd7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fd7 + +umlall za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11010111 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xd7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fd7 + +umlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e15 + +umlall za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e15 + +umlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0411 + +umlall za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0411 + +umlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x50,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144650 + +umlall za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x50,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144650 + +umlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120990 + +umlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120990 + +umlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4811 + +umlall za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4811 + +umlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xd5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ad5 + +umlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11010101 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xd5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ad5 + +umlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00010010 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116512 + +umlall za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00010010 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116512 + +umlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10010111 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2997 + +umlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10010111 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b2997 + + +umlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00010 + +umlall za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00010 + +umlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x51,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44151 + +umlall za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x51,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44151 + +umlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x91,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86191 + +umlall za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x91,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86191 + +umlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d1 + +umlall za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11010001 +// CHECK, INST: umlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d1 + +umlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x11,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00211 + +umlall za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x11,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00211 + +umlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0011 + +umlall za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0011 + +umlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x50,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44250 + +umlall za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01010000 +// CHECK, INST: umlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x50,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44250 + +umlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20190 + +umlall za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10010000 +// CHECK, INST: umlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20190 + +umlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x11,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4011 + +umlall za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00010001 +// CHECK, INST: umlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x11,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4011 + +umlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d1 + +umlall za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11010001 +// CHECK, INST: umlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd1,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d1 + +umlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00010000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06110 + +umlall za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00010000 +// CHECK, INST: umlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x10,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06110 + +umlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x91,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2191 + +umlall za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10010001 +// CHECK, INST: umlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x91,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2191 + + +umlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600010 + +umlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600010 + +umlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654151 + +umlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654151 + +umlall za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b1 + +umlall za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b1 + +umlall za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f1 + +umlall za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11110001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f1 + +umlall za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600231 + +umlall za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600231 + +umlall za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0031 + +umlall za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00110001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0031 + +umlall za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01110000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644270 + +umlall za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01110000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644270 + +umlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620190 + +umlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620190 + +umlall za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00110001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4031 + +umlall za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00110001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4031 + +umlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d1 + +umlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d1 + +umlall za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00110000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616130 + +umlall za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00110000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616130 + +umlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2191 + +umlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2191 + + +umlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900010 + +umlall za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900010 + +umlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01010101 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x55,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954555 + +umlall za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01010101 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x55,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1954555 + +umlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986597 + +umlall za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1986597 + +umlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xd7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67d7 + +umlall za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11010111 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xd7,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67d7 + +umlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900615 + +umlall za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900615 + +umlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0411 + +umlall za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0411 + +umlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x50,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944650 + +umlall za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x50,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944650 + +umlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920190 + +umlall za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920190 + +umlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4011 + +umlall za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4011 + +umlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xd5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02d5 + +umlall za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11010101 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xd5,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02d5 + +umlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00010010 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916512 + +umlall za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00010010 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1916512 + +umlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10010111 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2197 + +umlall za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10010111 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b2197 + + +umlall za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00010 + +umlall za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00010 + +umlall za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44151 + +umlall za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x51,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44151 + +umlall za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x91,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86191 + +umlall za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x91,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86191 + +umlall za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d1 + +umlall za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11010001 +// CHECK, INST: umlall za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d1 + +umlall za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x11,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00211 + +umlall za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x11,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00211 + +umlall za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0011 + +umlall za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0011 + +umlall za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44250 + +umlall za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01010000 +// CHECK, INST: umlall za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x50,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44250 + +umlall za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20190 + +umlall za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10010000 +// CHECK, INST: umlall za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20190 + +umlall za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4011 + +umlall za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00010001 +// CHECK, INST: umlall za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x11,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4011 + +umlall za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d1 + +umlall za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11010001 +// CHECK, INST: umlall za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd1,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d1 + +umlall za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00010000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06110 + +umlall za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00010000 +// CHECK, INST: umlall za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x10,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06110 + +umlall za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x91,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2191 + +umlall za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10010001 +// CHECK, INST: umlall za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x91,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2191 + + +umlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300010 + +umlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x10,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300010 + +umlall za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354151 + +umlall za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x51,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354151 + +umlall za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b1 + +umlall za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb1,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b1 + +umlall za.s[w11, 4:7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f1 + +umlall za.s[w11, 4:7], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11110001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf1,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f1 + +umlall za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300231 + +umlall za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x31,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300231 + +umlall za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0031 + +umlall za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00110001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x31,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0031 + +umlall za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01110000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344270 + +umlall za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01110000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x70,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344270 + +umlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320190 + +umlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x90,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320190 + +umlall za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00110001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4031 + +umlall za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00110001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x31,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4031 + +umlall za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d1 + +umlall za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd1,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d1 + +umlall za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00110000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316130 + +umlall za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00110000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x30,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316130 + +umlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2191 + +umlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x91,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2191 + + +umlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108010 + +umlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x10,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108010 + +umlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00010101 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x15,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c515 + +umlall za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00010101 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x15,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c515 + +umlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed97 + +umlall za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x97,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed97 + +umlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x97,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef97 + +umlall za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10010111 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x97,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef97 + +umlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e15 + +umlall za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x15,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e15 + +umlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8411 + +umlall za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x11,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8411 + +umlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x10,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c610 + +umlall za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x10,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c610 + +umlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128990 + +umlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x90,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128990 + +umlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac811 + +umlall za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x11,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac811 + +umlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x95,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a95 + +umlall za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10010101 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x95,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a95 + +umlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00010010 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e512 + +umlall za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00010010 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x12,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e512 + +umlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10010111 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba997 + +umlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10010111 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x97,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba997 + + +umlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10010 + +umlall za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10010 + +umlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x11,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54111 + +umlall za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x11,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54111 + +umlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96191 + +umlall za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96191 + +umlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6391 + +umlall za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10010001 +// CHECK-INST: umlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6391 + +umlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x11,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10211 + +umlall za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x11,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10211 + +umlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0011 + +umlall za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x11,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0011 + +umlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x10,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54210 + +umlall za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00010000 +// CHECK-INST: umlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x10,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54210 + +umlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10190 + +umlall za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10010000 +// CHECK-INST: umlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x90,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10190 + +umlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x11,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94011 + +umlall za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00010001 +// CHECK-INST: umlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x11,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94011 + +umlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0291 + +umlall za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10010001 +// CHECK-INST: umlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x91,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0291 + +umlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00010000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16110 + +umlall za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00010000 +// CHECK-INST: umlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x10,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16110 + +umlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92191 + +umlall za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10010001 +// CHECK-INST: umlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x91,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92191 + + +umlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700010 + +umlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x10,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700010 + +umlall za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754151 + +umlall za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x51,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754151 + +umlall za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b1 + +umlall za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb1,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b1 + +umlall za.d[w11, 4:7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f1 + +umlall za.d[w11, 4:7], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11110001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf1,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f1 + +umlall za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700231 + +umlall za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x31,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700231 + +umlall za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0031 + +umlall za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00110001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x31,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0031 + +umlall za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01110000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744270 + +umlall za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01110000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x70,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744270 + +umlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720190 + +umlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x90,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720190 + +umlall za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00110001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4031 + +umlall za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00110001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x31,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4031 + +umlall za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d1 + +umlall za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd1,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d1 + +umlall za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00110000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716130 + +umlall za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00110000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x30,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716130 + +umlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2191 + +umlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x91,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2191 + + +umlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908010 + +umlall za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x10,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908010 + +umlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00010101 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x15,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c515 + +umlall za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00010101 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x15,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c515 + +umlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e597 + +umlall za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x97,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e597 + +umlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x97,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe797 + +umlall za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10010111 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x97,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe797 + +umlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908615 + +umlall za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x15,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908615 + +umlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8411 + +umlall za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x11,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8411 + +umlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x10,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c610 + +umlall za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x10,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c610 + +umlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928190 + +umlall za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x90,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928190 + +umlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac011 + +umlall za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x11,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac011 + +umlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x95,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8295 + +umlall za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10010101 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x95,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8295 + +umlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00010010 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e512 + +umlall za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00010010 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x12,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e512 + +umlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10010111 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba197 + +umlall za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10010111 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x97,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba197 + + +umlall za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10010 + +umlall za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10010 + +umlall za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x11,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54111 + +umlall za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x11,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54111 + +umlall za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96191 + +umlall za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96191 + +umlall za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6391 + +umlall za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10010001 +// CHECK-INST: umlall za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6391 + +umlall za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x11,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10211 + +umlall za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x11,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10211 + +umlall za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0011 + +umlall za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x11,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0011 + +umlall za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54210 + +umlall za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00010000 +// CHECK-INST: umlall za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x10,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54210 + +umlall za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10190 + +umlall za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10010000 +// CHECK-INST: umlall za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x90,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10190 + +umlall za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94011 + +umlall za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00010001 +// CHECK-INST: umlall za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x11,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94011 + +umlall za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0291 + +umlall za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10010001 +// CHECK-INST: umlall za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x91,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0291 + +umlall za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00010000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16110 + +umlall za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00010000 +// CHECK-INST: umlall za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x10,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16110 + +umlall za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92191 + +umlall za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10010001 +// CHECK-INST: umlall za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x91,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92191 + diff --git a/llvm/test/MC/AArch64/SME2/umlsll-diagnostics.s b/llvm/test/MC/AArch64/SME2/umlsll-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlsll-diagnostics.s @@ -0,0 +1,74 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +umlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: umlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: umlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +umlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: umlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +umlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: umlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w12, 6:7, vgx2], {z12.h-z13.h}, z2.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: umlsll za.d[w12, 6:7, vgx2], {z12.h-z13.h}, z2.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +umlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: umlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +umlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: umlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +umlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: umlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umlsll-diagnostics.s-e b/llvm/test/MC/AArch64/SME2/umlsll-diagnostics.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlsll-diagnostics.s-e @@ -0,0 +1,74 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +umlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlsll za.d[w11, 6:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: umlsll za.d[w11, 6:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: umlsll za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +umlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: umlsll za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: umlsll za.d[w10, 4:7], z10.h, z30.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +umlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlsll za.s[w7, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w12, 6:7, vgx2], {z12.h-z13.h}, z2.h[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlsll za.d[w12, 6:7, vgx2], {z12.h-z13.h}, z2.h[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +umlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umlsll za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: umlsll za.d[w8, 5:8, vgx2], {z22.h-z23.h}, z14.h[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +umlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .d +// CHECK-NEXT: umlsll za.h[w8, 6:7, vgx2], {z12.h-z13.h}, {z8.h-z9.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +umlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: umlsll za.s[w8, 0:3], {z0.b-z3.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umlsll.s b/llvm/test/MC/AArch64/SME2/umlsll.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlsll.s @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umlsll za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x18,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200418 + +umlsll za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x59,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254559 + +umlsll za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xbb,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865bb + +umlsll za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xfb,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67fb + +umlsll za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x39,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200639 + +umlsll za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x39,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0439 + +umlsll za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x78,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244678 + +umlsll za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x98,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220598 + +umlsll za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x39,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4439 + +umlsll za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xd9,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06d9 + +umlsll za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00111010 +// CHECK-INST: umlsll za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x3a,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121653a + +umlsll za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10011011 +// CHECK-INST: umlsll za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x9b,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b259b + + +umlsll za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x18,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000018 + +umlsll za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x59,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055559 + +umlsll za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xbb,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108edbb + +umlsll za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xfb,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ffffb + +umlsll za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x39,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e39 + +umlsll za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x39,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8439 + +umlsll za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x78,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045678 + +umlsll za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x98,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021998 + +umlsll za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x39,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac839 + +umlsll za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xd9,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ad9 + +umlsll za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00111010 +// CHECK-INST: umlsll za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x3a,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f53a + +umlsll za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10011011 +// CHECK-INST: umlsll za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x9b,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba99b + + +umlsll za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x18,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600418 + +umlsll za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x59,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654559 + +umlsll za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xbb,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865bb + +umlsll za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xfb,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67fb + +umlsll za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x39,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600639 + +umlsll za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x39,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0439 + +umlsll za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x78,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644678 + +umlsll za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x98,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620598 + +umlsll za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x39,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4439 + +umlsll za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xd9,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06d9 + +umlsll za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00111010 +// CHECK-INST: umlsll za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x3a,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161653a + +umlsll za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10011011 +// CHECK-INST: umlsll za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x9b,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b259b + + +umlsll za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800018 + +umlsll za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x59,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854559 + +umlsll za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xbb,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188edbb + +umlsll za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xfb,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18feffb + +umlsll za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x39,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e39 + +umlsll za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x39,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8439 + +umlsll za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x78,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844678 + +umlsll za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x98,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820998 + +umlsll za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x39,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac839 + +umlsll za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xd9,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ad9 + +umlsll za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00111010 +// CHECK-INST: umlsll za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x3a,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e53a + +umlsll za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10011011 +// CHECK-INST: umlsll za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x9b,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba99b + + +umlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200018 + +umlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200018 + +umlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254159 + +umlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254159 + +umlsll za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b9 + +umlsll za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b9 + +umlsll za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f9 + +umlsll za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f9 + +umlsll za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200239 + +umlsll za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200239 + +umlsll za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0039 + +umlsll za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0039 + +umlsll za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01111000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244278 + +umlsll za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01111000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244278 + +umlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220198 + +umlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220198 + +umlsll za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00111001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4039 + +umlsll za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00111001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4039 + +umlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d9 + +umlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d9 + +umlsll za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00111000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216138 + +umlsll za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00111000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216138 + +umlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2199 + +umlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2199 + + +umlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100018 + +umlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100018 + +umlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01011101 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x5d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115455d + +umlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01011101 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x5d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115455d + +umlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d9f + +umlsll za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d9f + +umlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xdf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fdf + +umlsll za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xdf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fdf + +umlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e1d + +umlsll za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e1d + +umlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0419 + +umlsll za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0419 + +umlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x58,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144658 + +umlsll za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x58,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144658 + +umlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120998 + +umlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120998 + +umlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4819 + +umlsll za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4819 + +umlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xdd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0add + +umlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xdd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0add + +umlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00011010 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111651a + +umlsll za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00011010 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111651a + +umlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10011111 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b299f + +umlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10011111 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b299f + + +umlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00018 + +umlsll za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00018 + +umlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x59,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44159 + +umlsll za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x59,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44159 + +umlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x99,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86199 + +umlsll za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x99,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86199 + +umlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d9 + +umlsll za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d9 + +umlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x19,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00219 + +umlsll za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x19,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00219 + +umlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0019 + +umlsll za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0019 + +umlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x58,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44258 + +umlsll za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x58,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44258 + +umlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20198 + +umlsll za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20198 + +umlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x19,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4019 + +umlsll za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x19,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4019 + +umlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d9 + +umlsll za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d9 + +umlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00011000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06118 + +umlsll za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00011000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06118 + +umlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x99,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2199 + +umlsll za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x99,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2199 + + +umlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600018 + +umlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600018 + +umlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654159 + +umlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654159 + +umlsll za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b9 + +umlsll za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b9 + +umlsll za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f9 + +umlsll za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f9 + +umlsll za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600239 + +umlsll za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600239 + +umlsll za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0039 + +umlsll za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0039 + +umlsll za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01111000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644278 + +umlsll za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01111000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644278 + +umlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620198 + +umlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620198 + +umlsll za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00111001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4039 + +umlsll za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00111001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4039 + +umlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d9 + +umlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d9 + +umlsll za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00111000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616138 + +umlsll za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00111000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616138 + +umlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2199 + +umlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2199 + + +umlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900018 + +umlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900018 + +umlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01011101 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x5d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195455d + +umlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01011101 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x5d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195455d + +umlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198659f + +umlsll za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198659f + +umlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xdf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67df + +umlsll za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xdf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67df + +umlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190061d + +umlsll za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190061d + +umlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0419 + +umlsll za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0419 + +umlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x58,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944658 + +umlsll za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x58,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944658 + +umlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920198 + +umlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920198 + +umlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4019 + +umlsll za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4019 + +umlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02dd + +umlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02dd + +umlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00011010 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191651a + +umlsll za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00011010 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191651a + +umlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10011111 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b219f + +umlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10011111 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b219f + + +umlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00018 + +umlsll za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00018 + +umlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x59,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44159 + +umlsll za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x59,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44159 + +umlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x99,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86199 + +umlsll za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x99,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86199 + +umlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d9 + +umlsll za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d9 + +umlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x19,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00219 + +umlsll za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x19,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00219 + +umlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0019 + +umlsll za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0019 + +umlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x58,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44258 + +umlsll za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x58,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44258 + +umlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20198 + +umlsll za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20198 + +umlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x19,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4019 + +umlsll za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x19,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4019 + +umlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d9 + +umlsll za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d9 + +umlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00011000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06118 + +umlsll za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00011000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06118 + +umlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x99,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2199 + +umlsll za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x99,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2199 + + +umlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300018 + +umlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300018 + +umlsll za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354159 + +umlsll za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354159 + +umlsll za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b9 + +umlsll za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b9 + +umlsll za.s[w11, 4:7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f9 + +umlsll za.s[w11, 4:7], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f9 + +umlsll za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300239 + +umlsll za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300239 + +umlsll za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0039 + +umlsll za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0039 + +umlsll za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344278 + +umlsll za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344278 + +umlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320198 + +umlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320198 + +umlsll za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4039 + +umlsll za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4039 + +umlsll za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d9 + +umlsll za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d9 + +umlsll za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00111000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316138 + +umlsll za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00111000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316138 + +umlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2199 + +umlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2199 + + +umlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108018 + +umlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108018 + +umlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00011101 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x1d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c51d + +umlsll za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00011101 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x1d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c51d + +umlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed9f + +umlsll za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed9f + +umlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x9f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef9f + +umlsll za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x9f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef9f + +umlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e1d + +umlsll za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e1d + +umlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8419 + +umlsll za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8419 + +umlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x18,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c618 + +umlsll za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x18,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c618 + +umlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128998 + +umlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128998 + +umlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac819 + +umlsll za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac819 + +umlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x9d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a9d + +umlsll za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x9d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a9d + +umlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00011010 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e51a + +umlsll za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00011010 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e51a + +umlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10011111 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba99f + +umlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10011111 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba99f + + +umlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10018 + +umlsll za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10018 + +umlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x19,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54119 + +umlsll za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x19,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54119 + +umlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96199 + +umlsll za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96199 + +umlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6399 + +umlsll za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6399 + +umlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x19,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10219 + +umlsll za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x19,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10219 + +umlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0019 + +umlsll za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0019 + +umlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x18,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54218 + +umlsll za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x18,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54218 + +umlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10198 + +umlsll za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10198 + +umlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x19,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94019 + +umlsll za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x19,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94019 + +umlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0299 + +umlsll za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0299 + +umlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00011000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16118 + +umlsll za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00011000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16118 + +umlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92199 + +umlsll za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92199 + + +umlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700018 + +umlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700018 + +umlsll za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754159 + +umlsll za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754159 + +umlsll za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b9 + +umlsll za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b9 + +umlsll za.d[w11, 4:7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f9 + +umlsll za.d[w11, 4:7], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f9 + +umlsll za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700239 + +umlsll za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700239 + +umlsll za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0039 + +umlsll za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0039 + +umlsll za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744278 + +umlsll za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744278 + +umlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720198 + +umlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720198 + +umlsll za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4039 + +umlsll za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4039 + +umlsll za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d9 + +umlsll za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d9 + +umlsll za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00111000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716138 + +umlsll za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00111000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716138 + +umlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2199 + +umlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2199 + + +umlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908018 + +umlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908018 + +umlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00011101 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x1d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c51d + +umlsll za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00011101 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x1d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c51d + +umlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e59f + +umlsll za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e59f + +umlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x9f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe79f + +umlsll za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x9f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe79f + +umlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190861d + +umlsll za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190861d + +umlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8419 + +umlsll za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8419 + +umlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x18,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c618 + +umlsll za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x18,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c618 + +umlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928198 + +umlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928198 + +umlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac019 + +umlsll za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac019 + +umlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x9d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e829d + +umlsll za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x9d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e829d + +umlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00011010 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e51a + +umlsll za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00011010 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e51a + +umlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10011111 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba19f + +umlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10011111 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba19f + + +umlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10018 + +umlsll za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10018 + +umlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x19,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54119 + +umlsll za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x19,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54119 + +umlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96199 + +umlsll za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96199 + +umlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6399 + +umlsll za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6399 + +umlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x19,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10219 + +umlsll za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x19,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10219 + +umlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0019 + +umlsll za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0019 + +umlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x18,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54218 + +umlsll za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x18,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54218 + +umlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10198 + +umlsll za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10198 + +umlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x19,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94019 + +umlsll za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x19,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94019 + +umlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0299 + +umlsll za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0299 + +umlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00011000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16118 + +umlsll za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00011000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16118 + +umlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92199 + +umlsll za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92199 + diff --git a/llvm/test/MC/AArch64/SME2/umlsll.s-e b/llvm/test/MC/AArch64/SME2/umlsll.s-e new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umlsll.s-e @@ -0,0 +1,2045 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umlsll za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x18,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200418 + +umlsll za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x59,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254559 + +umlsll za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xbb,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865bb + +umlsll za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xfb,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67fb + +umlsll za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x39,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200639 + +umlsll za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x39,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0439 + +umlsll za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x78,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244678 + +umlsll za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x98,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220598 + +umlsll za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x39,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4439 + +umlsll za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xd9,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06d9 + +umlsll za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00111010 +// CHECK-INST: umlsll za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x3a,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121653a + +umlsll za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10011011 +// CHECK-INST: umlsll za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x9b,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b259b + + +umlsll za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x18,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000018 + +umlsll za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x59,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055559 + +umlsll za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xbb,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108edbb + +umlsll za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11111011 +// CHECK-INST: umlsll za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xfb,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ffffb + +umlsll za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x39,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e39 + +umlsll za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x39,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8439 + +umlsll za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x78,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045678 + +umlsll za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x98,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021998 + +umlsll za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x39,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac839 + +umlsll za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xd9,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ad9 + +umlsll za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00111010 +// CHECK-INST: umlsll za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x3a,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f53a + +umlsll za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10011011 +// CHECK-INST: umlsll za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x9b,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba99b + + +umlsll za.d[w8, 0:3], z0.h, z0.h // 11000001-01100000-00000100-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z0.h, z0.h +// CHECK-ENCODING: [0x18,0x04,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600418 + +umlsll za.d[w10, 4:7], z10.h, z5.h // 11000001-01100101-01000101-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7], z10.h, z5.h +// CHECK-ENCODING: [0x59,0x45,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654559 + +umlsll za.d[w11, 12:15], z13.h, z8.h // 11000001-01101000-01100101-10111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z13.h, z8.h +// CHECK-ENCODING: [0xbb,0x65,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16865bb + +umlsll za.d[w11, 12:15], z31.h, z15.h // 11000001-01101111-01100111-11111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z31.h, z15.h +// CHECK-ENCODING: [0xfb,0x67,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f67fb + +umlsll za.d[w8, 4:7], z17.h, z0.h // 11000001-01100000-00000110-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z17.h, z0.h +// CHECK-ENCODING: [0x39,0x06,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600639 + +umlsll za.d[w8, 4:7], z1.h, z14.h // 11000001-01101110-00000100-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z1.h, z14.h +// CHECK-ENCODING: [0x39,0x04,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0439 + +umlsll za.d[w10, 0:3], z19.h, z4.h // 11000001-01100100-01000110-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3], z19.h, z4.h +// CHECK-ENCODING: [0x78,0x46,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644678 + +umlsll za.d[w8, 0:3], z12.h, z2.h // 11000001-01100010-00000101-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z12.h, z2.h +// CHECK-ENCODING: [0x98,0x05,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620598 + +umlsll za.d[w10, 4:7], z1.h, z10.h // 11000001-01101010-01000100-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7], z1.h, z10.h +// CHECK-ENCODING: [0x39,0x44,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4439 + +umlsll za.d[w8, 4:7], z22.h, z14.h // 11000001-01101110-00000110-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7], z22.h, z14.h +// CHECK-ENCODING: [0xd9,0x06,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e06d9 + +umlsll za.d[w11, 8:11], z9.h, z1.h // 11000001-01100001-01100101-00111010 +// CHECK-INST: umlsll za.d[w11, 8:11], z9.h, z1.h +// CHECK-ENCODING: [0x3a,0x65,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c161653a + +umlsll za.d[w9, 12:15], z12.h, z11.h // 11000001-01101011-00100101-10011011 +// CHECK-INST: umlsll za.d[w9, 12:15], z12.h, z11.h +// CHECK-ENCODING: [0x9b,0x25,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b259b + + +umlsll za.d[w8, 0:3], z0.h, z0.h[0] // 11000001-10000000-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z0.h, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800018 + +umlsll za.d[w10, 4:7], z10.h, z5.h[1] // 11000001-10000101-01000101-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7], z10.h, z5.h[1] +// CHECK-ENCODING: [0x59,0x45,0x85,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1854559 + +umlsll za.d[w11, 12:15], z13.h, z8.h[7] // 11000001-10001000-11101101-10111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z13.h, z8.h[7] +// CHECK-ENCODING: [0xbb,0xed,0x88,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c188edbb + +umlsll za.d[w11, 12:15], z31.h, z15.h[7] // 11000001-10001111-11101111-11111011 +// CHECK-INST: umlsll za.d[w11, 12:15], z31.h, z15.h[7] +// CHECK-ENCODING: [0xfb,0xef,0x8f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18feffb + +umlsll za.d[w8, 4:7], z17.h, z0.h[3] // 11000001-10000000-00001110-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z17.h, z0.h[3] +// CHECK-ENCODING: [0x39,0x0e,0x80,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1800e39 + +umlsll za.d[w8, 4:7], z1.h, z14.h[5] // 11000001-10001110-10000100-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7], z1.h, z14.h[5] +// CHECK-ENCODING: [0x39,0x84,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e8439 + +umlsll za.d[w10, 0:3], z19.h, z4.h[1] // 11000001-10000100-01000110-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3], z19.h, z4.h[1] +// CHECK-ENCODING: [0x78,0x46,0x84,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1844678 + +umlsll za.d[w8, 0:3], z12.h, z2.h[2] // 11000001-10000010-00001001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3], z12.h, z2.h[2] +// CHECK-ENCODING: [0x98,0x09,0x82,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1820998 + +umlsll za.d[w10, 4:7], z1.h, z10.h[6] // 11000001-10001010-11001000-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7], z1.h, z10.h[6] +// CHECK-ENCODING: [0x39,0xc8,0x8a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ac839 + +umlsll za.d[w8, 4:7], z22.h, z14.h[2] // 11000001-10001110-00001010-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7], z22.h, z14.h[2] +// CHECK-ENCODING: [0xd9,0x0a,0x8e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18e0ad9 + +umlsll za.d[w11, 8:11], z9.h, z1.h[5] // 11000001-10000001-11100101-00111010 +// CHECK-INST: umlsll za.d[w11, 8:11], z9.h, z1.h[5] +// CHECK-ENCODING: [0x3a,0xe5,0x81,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c181e53a + +umlsll za.d[w9, 12:15], z12.h, z11.h[6] // 11000001-10001011-10101001-10011011 +// CHECK-INST: umlsll za.d[w9, 12:15], z12.h, z11.h[6] +// CHECK-ENCODING: [0x9b,0xa9,0x8b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c18ba99b + + +umlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200018 + +umlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200018 + +umlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254159 + +umlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254159 + +umlsll za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b9 + +umlsll za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861b9 + +umlsll za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f9 + +umlsll za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11111001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63f9 + +umlsll za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200239 + +umlsll za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200239 + +umlsll za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0039 + +umlsll za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00111001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0039 + +umlsll za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01111000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244278 + +umlsll za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01111000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244278 + +umlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220198 + +umlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220198 + +umlsll za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00111001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4039 + +umlsll za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00111001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4039 + +umlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d9 + +umlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02d9 + +umlsll za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00111000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216138 + +umlsll za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00111000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216138 + +umlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2199 + +umlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2199 + + +umlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100018 + +umlsll za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100018 + +umlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01011101 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x5d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115455d + +umlsll za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01011101 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x5d,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115455d + +umlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d9f + +umlsll za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186d9f + +umlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xdf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fdf + +umlsll za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11011111 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xdf,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fdf + +umlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e1d + +umlsll za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e1d + +umlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0419 + +umlsll za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0419 + +umlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x58,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144658 + +umlsll za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x58,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144658 + +umlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120998 + +umlsll za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1120998 + +umlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4819 + +umlsll za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4819 + +umlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xdd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0add + +umlsll za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11011101 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xdd,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0add + +umlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00011010 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111651a + +umlsll za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00011010 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111651a + +umlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10011111 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b299f + +umlsll za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10011111 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b299f + + +umlsll za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00018 + +umlsll za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00018 + +umlsll za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x59,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44159 + +umlsll za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x59,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44159 + +umlsll za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x99,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86199 + +umlsll za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x99,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86199 + +umlsll za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d9 + +umlsll za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11011001 +// CHECK, INST: umlsll za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63d9 + +umlsll za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x19,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00219 + +umlsll za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x19,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00219 + +umlsll za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0019 + +umlsll za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0019 + +umlsll za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x58,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44258 + +umlsll za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01011000 +// CHECK, INST: umlsll za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x58,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44258 + +umlsll za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20198 + +umlsll za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10011000 +// CHECK, INST: umlsll za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20198 + +umlsll za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x19,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4019 + +umlsll za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00011001 +// CHECK, INST: umlsll za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x19,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4019 + +umlsll za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d9 + +umlsll za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11011001 +// CHECK, INST: umlsll za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xd9,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02d9 + +umlsll za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00011000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06118 + +umlsll za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00011000 +// CHECK, INST: umlsll za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x18,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06118 + +umlsll za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x99,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2199 + +umlsll za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10011001 +// CHECK, INST: umlsll za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x99,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2199 + + +umlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h // 11000001, 01100000, 00000000, 00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600018 + +umlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h // 11000001-01100000-00000000-00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600018 + +umlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h // 11000001, 01100101, 01000001, 01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654159 + +umlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h // 11000001-01100101-01000001-01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1654159 + +umlsll za.d[w11, 4:7, vgx2], {z13.h, z14.h}, z8.h // 11000001, 01101000, 01100001, 10111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b9 + +umlsll za.d[w11, 4:7], {z13.h - z14.h}, z8.h // 11000001-01101000-01100001-10111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z13.h, z14.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16861b9 + +umlsll za.d[w11, 4:7, vgx2], {z31.h, z0.h}, z15.h // 11000001, 01101111, 01100011, 11111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f9 + +umlsll za.d[w11, 4:7], {z31.h - z0.h}, z15.h // 11000001-01101111-01100011-11111001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z31.h, z0.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16f63f9 + +umlsll za.d[w8, 4:7, vgx2], {z17.h, z18.h}, z0.h // 11000001, 01100000, 00000010, 00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600239 + +umlsll za.d[w8, 4:7], {z17.h - z18.h}, z0.h // 11000001-01100000-00000010-00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z17.h, z18.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1600239 + +umlsll za.d[w8, 4:7, vgx2], {z1.h, z2.h}, z14.h // 11000001, 01101110, 00000000, 00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0039 + +umlsll za.d[w8, 4:7], {z1.h - z2.h}, z14.h // 11000001-01101110-00000000-00111001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e0039 + +umlsll za.d[w10, 0:3, vgx2], {z19.h, z20.h}, z4.h // 11000001, 01100100, 01000010, 01111000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644278 + +umlsll za.d[w10, 0:3], {z19.h - z20.h}, z4.h // 11000001-01100100-01000010-01111000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z19.h, z20.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x64,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1644278 + +umlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h // 11000001, 01100010, 00000001, 10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620198 + +umlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h // 11000001-01100010-00000001-10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x62,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1620198 + +umlsll za.d[w10, 4:7, vgx2], {z1.h, z2.h}, z10.h // 11000001, 01101010, 01000000, 00111001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4039 + +umlsll za.d[w10, 4:7], {z1.h - z2.h}, z10.h // 11000001-01101010-01000000-00111001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z1.h, z2.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x6a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16a4039 + +umlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h // 11000001, 01101110, 00000010, 11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d9 + +umlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h // 11000001-01101110-00000010-11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x6e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16e02d9 + +umlsll za.d[w11, 0:3, vgx2], {z9.h, z10.h}, z1.h // 11000001, 01100001, 01100001, 00111000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616138 + +umlsll za.d[w11, 0:3], {z9.h - z10.h}, z1.h // 11000001-01100001-01100001-00111000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z9.h, z10.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x61,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1616138 + +umlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h // 11000001, 01101011, 00100001, 10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2199 + +umlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h // 11000001-01101011-00100001-10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x6b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16b2199 + + +umlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001, 10010000, 00000000, 00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900018 + +umlsll za.d[w8, 0:3], {z0.h - z1.h}, z0.h[0] // 11000001-10010000-00000000-00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x00,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1900018 + +umlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, z5.h[6] // 11000001, 10010101, 01000101, 01011101 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x5d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195455d + +umlsll za.d[w10, 4:7], {z10.h - z11.h}, z5.h[6] // 11000001-10010101-01000101-01011101 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, z5.h[6] +// CHECK-ENCODING: [0x5d,0x45,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195455d + +umlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, z8.h[7] // 11000001, 10011000, 01100101, 10011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198659f + +umlsll za.d[w11, 4:7], {z12.h - z13.h}, z8.h[7] // 11000001-10011000-01100101-10011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0x65,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198659f + +umlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, z15.h[7] // 11000001, 10011111, 01100111, 11011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xdf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67df + +umlsll za.d[w11, 4:7], {z30.h - z31.h}, z15.h[7] // 11000001-10011111-01100111-11011111 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, z15.h[7] +// CHECK-ENCODING: [0xdf,0x67,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19f67df + +umlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, z0.h[6] // 11000001, 10010000, 00000110, 00011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190061d + +umlsll za.d[w8, 4:7], {z16.h - z17.h}, z0.h[6] // 11000001-10010000-00000110-00011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x06,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190061d + +umlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, z14.h[4] // 11000001, 10011110, 00000100, 00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0419 + +umlsll za.d[w8, 4:7], {z0.h - z1.h}, z14.h[4] // 11000001-10011110-00000100-00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x04,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e0419 + +umlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, z4.h[4] // 11000001, 10010100, 01000110, 01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x58,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944658 + +umlsll za.d[w10, 0:3], {z18.h - z19.h}, z4.h[4] // 11000001-10010100-01000110-01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, z4.h[4] +// CHECK-ENCODING: [0x58,0x46,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1944658 + +umlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, z2.h[0] // 11000001, 10010010, 00000001, 10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920198 + +umlsll za.d[w8, 0:3], {z12.h - z13.h}, z2.h[0] // 11000001-10010010-00000001-10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x01,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1920198 + +umlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, z10.h[0] // 11000001, 10011010, 01000000, 00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4019 + +umlsll za.d[w10, 4:7], {z0.h - z1.h}, z10.h[0] // 11000001-10011010-01000000-00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0x40,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19a4019 + +umlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001, 10011110, 00000010, 11011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02dd + +umlsll za.d[w8, 4:7], {z22.h - z23.h}, z14.h[2] // 11000001-10011110-00000010-11011101 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xdd,0x02,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e02dd + +umlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, z1.h[5] // 11000001, 10010001, 01100101, 00011010 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191651a + +umlsll za.d[w11, 0:3], {z8.h - z9.h}, z1.h[5] // 11000001-10010001-01100101-00011010 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0x65,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191651a + +umlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, z11.h[3] // 11000001, 10011011, 00100001, 10011111 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b219f + +umlsll za.d[w9, 4:7], {z12.h - z13.h}, z11.h[3] // 11000001-10011011-00100001-10011111 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0x21,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19b219f + + +umlsll za.d[w8, 0:3, vgx2], {z0.h, z1.h}, {z0.h, z1.h} // 11000001, 11100000, 00000000, 00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00018 + +umlsll za.d[w8, 0:3], {z0.h - z1.h}, {z0.h - z1.h} // 11000001-11100000-00000000-00011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x00,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e00018 + +umlsll za.d[w10, 4:7, vgx2], {z10.h, z11.h}, {z20.h, z21.h} // 11000001, 11110100, 01000001, 01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x59,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44159 + +umlsll za.d[w10, 4:7], {z10.h - z11.h}, {z20.h - z21.h} // 11000001-11110100-01000001-01011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z10.h, z11.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x59,0x41,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44159 + +umlsll za.d[w11, 4:7, vgx2], {z12.h, z13.h}, {z8.h, z9.h} // 11000001, 11101000, 01100001, 10011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x99,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86199 + +umlsll za.d[w11, 4:7], {z12.h - z13.h}, {z8.h - z9.h} // 11000001-11101000-01100001-10011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z12.h, z13.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x99,0x61,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e86199 + +umlsll za.d[w11, 4:7, vgx2], {z30.h, z31.h}, {z30.h, z31.h} // 11000001, 11111110, 01100011, 11011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d9 + +umlsll za.d[w11, 4:7], {z30.h - z31.h}, {z30.h - z31.h} // 11000001-11111110-01100011-11011001 +// CHECK, INST: umlsll za.d[w11, 4:7, vgx2], { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x63,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe63d9 + +umlsll za.d[w8, 4:7, vgx2], {z16.h, z17.h}, {z16.h, z17.h} // 11000001, 11110000, 00000010, 00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x19,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00219 + +umlsll za.d[w8, 4:7], {z16.h - z17.h}, {z16.h - z17.h} // 11000001-11110000-00000010-00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z16.h, z17.h }, { z16.h, z17.h } +// CHECK-ENCODING: [0x19,0x02,0xf0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f00219 + +umlsll za.d[w8, 4:7, vgx2], {z0.h, z1.h}, {z30.h, z31.h} // 11000001, 11111110, 00000000, 00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0019 + +umlsll za.d[w8, 4:7], {z0.h - z1.h}, {z30.h - z31.h} // 11000001-11111110-00000000-00011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z0.h, z1.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe0019 + +umlsll za.d[w10, 0:3, vgx2], {z18.h, z19.h}, {z20.h, z21.h} // 11000001, 11110100, 01000010, 01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x58,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44258 + +umlsll za.d[w10, 0:3], {z18.h - z19.h}, {z20.h - z21.h} // 11000001-11110100-01000010-01011000 +// CHECK, INST: umlsll za.d[w10, 0:3, vgx2], { z18.h, z19.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x58,0x42,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f44258 + +umlsll za.d[w8, 0:3, vgx2], {z12.h, z13.h}, {z2.h, z3.h} // 11000001, 11100010, 00000001, 10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20198 + +umlsll za.d[w8, 0:3], {z12.h - z13.h}, {z2.h - z3.h} // 11000001-11100010-00000001-10011000 +// CHECK, INST: umlsll za.d[w8, 0:3, vgx2], { z12.h, z13.h }, { z2.h, z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e20198 + +umlsll za.d[w10, 4:7, vgx2], {z0.h, z1.h}, {z26.h, z27.h} // 11000001, 11111010, 01000000, 00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x19,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4019 + +umlsll za.d[w10, 4:7], {z0.h - z1.h}, {z26.h - z27.h} // 11000001-11111010-01000000-00011001 +// CHECK, INST: umlsll za.d[w10, 4:7, vgx2], { z0.h, z1.h }, { z26.h, z27.h } +// CHECK-ENCODING: [0x19,0x40,0xfa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fa4019 + +umlsll za.d[w8, 4:7, vgx2], {z22.h, z23.h}, {z30.h, z31.h} // 11000001, 11111110, 00000010, 11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d9 + +umlsll za.d[w8, 4:7], {z22.h - z23.h}, {z30.h - z31.h} // 11000001-11111110-00000010-11011001 +// CHECK, INST: umlsll za.d[w8, 4:7, vgx2], { z22.h, z23.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0xd9,0x02,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fe02d9 + +umlsll za.d[w11, 0:3, vgx2], {z8.h, z9.h}, {z0.h, z1.h} // 11000001, 11100000, 01100001, 00011000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06118 + +umlsll za.d[w11, 0:3], {z8.h - z9.h}, {z0.h - z1.h} // 11000001-11100000-01100001-00011000 +// CHECK, INST: umlsll za.d[w11, 0:3, vgx2], { z8.h, z9.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x18,0x61,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e06118 + +umlsll za.d[w9, 4:7, vgx2], {z12.h, z13.h}, {z10.h, z11.h} // 11000001, 11101010, 00100001, 10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x99,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2199 + +umlsll za.d[w9, 4:7], {z12.h - z13.h}, {z10.h - z11.h} // 11000001-11101010-00100001-10011001 +// CHECK, INST: umlsll za.d[w9, 4:7, vgx2], { z12.h, z13.h }, { z10.h, z11.h } +// CHECK-ENCODING: [0x99,0x21,0xea,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ea2199 + + +umlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300018 + +umlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x18,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300018 + +umlsll za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354159 + +umlsll za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x59,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354159 + +umlsll za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b9 + +umlsll za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xb9,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861b9 + +umlsll za.s[w11, 4:7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f9 + +umlsll za.s[w11, 4:7], {z31.b - z2.b}, z15.b // 11000001-00111111-01100011-11111001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xf9,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63f9 + +umlsll za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300239 + +umlsll za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x39,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300239 + +umlsll za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0039 + +umlsll za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00111001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x39,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0039 + +umlsll za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344278 + +umlsll za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01111000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x78,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344278 + +umlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320198 + +umlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x98,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320198 + +umlsll za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4039 + +umlsll za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00111001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x39,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4039 + +umlsll za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d9 + +umlsll za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xd9,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02d9 + +umlsll za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00111000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316138 + +umlsll za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00111000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x38,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316138 + +umlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2199 + +umlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x99,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2199 + + +umlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108018 + +umlsll za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x18,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108018 + +umlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00011101 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x1d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c51d + +umlsll za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00011101 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x1d,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c51d + +umlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed9f + +umlsll za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0x9f,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118ed9f + +umlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x9f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef9f + +umlsll za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10011111 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0x9f,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fef9f + +umlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e1d + +umlsll za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x1d,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e1d + +umlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8419 + +umlsll za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x19,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8419 + +umlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x18,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c618 + +umlsll za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x18,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c618 + +umlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128998 + +umlsll za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0x98,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1128998 + +umlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac819 + +umlsll za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x19,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac819 + +umlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x9d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a9d + +umlsll za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10011101 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0x9d,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8a9d + +umlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00011010 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e51a + +umlsll za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00011010 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x1a,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e51a + +umlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10011111 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba99f + +umlsll za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10011111 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0x9f,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba99f + + +umlsll za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10018 + +umlsll za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10018 + +umlsll za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x19,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54119 + +umlsll za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x19,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54119 + +umlsll za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96199 + +umlsll za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96199 + +umlsll za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6399 + +umlsll za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10011001 +// CHECK-INST: umlsll za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6399 + +umlsll za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x19,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10219 + +umlsll za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x19,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10219 + +umlsll za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0019 + +umlsll za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x19,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0019 + +umlsll za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x18,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54218 + +umlsll za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00011000 +// CHECK-INST: umlsll za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x18,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54218 + +umlsll za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10198 + +umlsll za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10011000 +// CHECK-INST: umlsll za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x98,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10198 + +umlsll za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x19,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94019 + +umlsll za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00011001 +// CHECK-INST: umlsll za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x19,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94019 + +umlsll za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0299 + +umlsll za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10011001 +// CHECK-INST: umlsll za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x99,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0299 + +umlsll za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00011000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16118 + +umlsll za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00011000 +// CHECK-INST: umlsll za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x18,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16118 + +umlsll za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92199 + +umlsll za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10011001 +// CHECK-INST: umlsll za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x99,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92199 + + +umlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700018 + +umlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h // 11000001-01110000-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x18,0x00,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700018 + +umlsll za.d[w10, 4:7, vgx4], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754159 + +umlsll za.d[w10, 4:7], {z10.h - z13.h}, z5.h // 11000001-01110101-01000001-01011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z10.h - z13.h }, z5.h +// CHECK-ENCODING: [0x59,0x41,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1754159 + +umlsll za.d[w11, 4:7, vgx4], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b9 + +umlsll za.d[w11, 4:7], {z13.h - z16.h}, z8.h // 11000001-01111000-01100001-10111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z13.h - z16.h }, z8.h +// CHECK-ENCODING: [0xb9,0x61,0x78,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17861b9 + +umlsll za.d[w11, 4:7, vgx4], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f9 + +umlsll za.d[w11, 4:7], {z31.h - z2.h}, z15.h // 11000001-01111111-01100011-11111001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z31.h, z0.h, z1.h, z2.h }, z15.h +// CHECK-ENCODING: [0xf9,0x63,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17f63f9 + +umlsll za.d[w8, 4:7, vgx4], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700239 + +umlsll za.d[w8, 4:7], {z17.h - z20.h}, z0.h // 11000001-01110000-00000010-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z17.h - z20.h }, z0.h +// CHECK-ENCODING: [0x39,0x02,0x70,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1700239 + +umlsll za.d[w8, 4:7, vgx4], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0039 + +umlsll za.d[w8, 4:7], {z1.h - z4.h}, z14.h // 11000001-01111110-00000000-00111001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z1.h - z4.h }, z14.h +// CHECK-ENCODING: [0x39,0x00,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e0039 + +umlsll za.d[w10, 0:3, vgx4], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744278 + +umlsll za.d[w10, 0:3], {z19.h - z22.h}, z4.h // 11000001-01110100-01000010-01111000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z19.h - z22.h }, z4.h +// CHECK-ENCODING: [0x78,0x42,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1744278 + +umlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720198 + +umlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h // 11000001-01110010-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h +// CHECK-ENCODING: [0x98,0x01,0x72,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1720198 + +umlsll za.d[w10, 4:7, vgx4], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4039 + +umlsll za.d[w10, 4:7], {z1.h - z4.h}, z10.h // 11000001-01111010-01000000-00111001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z1.h - z4.h }, z10.h +// CHECK-ENCODING: [0x39,0x40,0x7a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17a4039 + +umlsll za.d[w8, 4:7, vgx4], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d9 + +umlsll za.d[w8, 4:7], {z22.h - z25.h}, z14.h // 11000001-01111110-00000010-11011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z22.h - z25.h }, z14.h +// CHECK-ENCODING: [0xd9,0x02,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17e02d9 + +umlsll za.d[w11, 0:3, vgx4], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00111000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716138 + +umlsll za.d[w11, 0:3], {z9.h - z12.h}, z1.h // 11000001-01110001-01100001-00111000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z9.h - z12.h }, z1.h +// CHECK-ENCODING: [0x38,0x61,0x71,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1716138 + +umlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2199 + +umlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h // 11000001-01111011-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h +// CHECK-ENCODING: [0x99,0x21,0x7b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17b2199 + + +umlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908018 + +umlsll za.d[w8, 0:3], {z0.h - z3.h}, z0.h[0] // 11000001-10010000-10000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x80,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1908018 + +umlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00011101 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x1d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c51d + +umlsll za.d[w10, 4:7], {z8.h - z11.h}, z5.h[6] // 11000001-10010101-11000101-00011101 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, z5.h[6] +// CHECK-ENCODING: [0x1d,0xc5,0x95,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c195c51d + +umlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e59f + +umlsll za.d[w11, 4:7], {z12.h - z15.h}, z8.h[7] // 11000001-10011000-11100101-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, z8.h[7] +// CHECK-ENCODING: [0x9f,0xe5,0x98,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c198e59f + +umlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x9f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe79f + +umlsll za.d[w11, 4:7], {z28.h - z31.h}, z15.h[7] // 11000001-10011111-11100111-10011111 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, z15.h[7] +// CHECK-ENCODING: [0x9f,0xe7,0x9f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19fe79f + +umlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190861d + +umlsll za.d[w8, 4:7], {z16.h - z19.h}, z0.h[6] // 11000001-10010000-10000110-00011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, z0.h[6] +// CHECK-ENCODING: [0x1d,0x86,0x90,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c190861d + +umlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8419 + +umlsll za.d[w8, 4:7], {z0.h - z3.h}, z14.h[4] // 11000001-10011110-10000100-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, z14.h[4] +// CHECK-ENCODING: [0x19,0x84,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e8419 + +umlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x18,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c618 + +umlsll za.d[w10, 0:3], {z16.h - z19.h}, z4.h[4] // 11000001-10010100-11000110-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, z4.h[4] +// CHECK-ENCODING: [0x18,0xc6,0x94,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c194c618 + +umlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928198 + +umlsll za.d[w8, 0:3], {z12.h - z15.h}, z2.h[0] // 11000001-10010010-10000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x81,0x92,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1928198 + +umlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac019 + +umlsll za.d[w10, 4:7], {z0.h - z3.h}, z10.h[0] // 11000001-10011010-11000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc0,0x9a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ac019 + +umlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x9d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e829d + +umlsll za.d[w8, 4:7], {z20.h - z23.h}, z14.h[2] // 11000001-10011110-10000010-10011101 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, z14.h[2] +// CHECK-ENCODING: [0x9d,0x82,0x9e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19e829d + +umlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00011010 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e51a + +umlsll za.d[w11, 0:3], {z8.h - z11.h}, z1.h[5] // 11000001-10010001-11100101-00011010 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, z1.h[5] +// CHECK-ENCODING: [0x1a,0xe5,0x91,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c191e51a + +umlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10011111 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba19f + +umlsll za.d[w9, 4:7], {z12.h - z15.h}, z11.h[3] // 11000001-10011011-10100001-10011111 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, z11.h[3] +// CHECK-ENCODING: [0x9f,0xa1,0x9b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c19ba19f + + +umlsll za.d[w8, 0:3, vgx4], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10018 + +umlsll za.d[w8, 0:3], {z0.h - z3.h}, {z0.h - z3.h} // 11000001-11100001-00000000-00011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x00,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10018 + +umlsll za.d[w10, 4:7, vgx4], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x19,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54119 + +umlsll za.d[w10, 4:7], {z8.h - z11.h}, {z20.h - z23.h} // 11000001-11110101-01000001-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z8.h - z11.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x19,0x41,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54119 + +umlsll za.d[w11, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96199 + +umlsll za.d[w11, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-01100001-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x61,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e96199 + +umlsll za.d[w11, 4:7, vgx4], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6399 + +umlsll za.d[w11, 4:7], {z28.h - z31.h}, {z28.h - z31.h} // 11000001-11111101-01100011-10011001 +// CHECK-INST: umlsll za.d[w11, 4:7, vgx4], { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x63,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd6399 + +umlsll za.d[w8, 4:7, vgx4], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x19,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10219 + +umlsll za.d[w8, 4:7], {z16.h - z19.h}, {z16.h - z19.h} // 11000001-11110001-00000010-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z16.h - z19.h }, { z16.h - z19.h } +// CHECK-ENCODING: [0x19,0x02,0xf1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f10219 + +umlsll za.d[w8, 4:7, vgx4], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0019 + +umlsll za.d[w8, 4:7], {z0.h - z3.h}, {z28.h - z31.h} // 11000001-11111101-00000000-00011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z0.h - z3.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x19,0x00,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0019 + +umlsll za.d[w10, 0:3, vgx4], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x18,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54218 + +umlsll za.d[w10, 0:3], {z16.h - z19.h}, {z20.h - z23.h} // 11000001-11110101-01000010-00011000 +// CHECK-INST: umlsll za.d[w10, 0:3, vgx4], { z16.h - z19.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x18,0x42,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f54218 + +umlsll za.d[w8, 0:3, vgx4], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10198 + +umlsll za.d[w8, 0:3], {z12.h - z15.h}, {z0.h - z3.h} // 11000001-11100001-00000001-10011000 +// CHECK-INST: umlsll za.d[w8, 0:3, vgx4], { z12.h - z15.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x98,0x01,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e10198 + +umlsll za.d[w10, 4:7, vgx4], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x19,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94019 + +umlsll za.d[w10, 4:7], {z0.h - z3.h}, {z24.h - z27.h} // 11000001-11111001-01000000-00011001 +// CHECK-INST: umlsll za.d[w10, 4:7, vgx4], { z0.h - z3.h }, { z24.h - z27.h } +// CHECK-ENCODING: [0x19,0x40,0xf9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f94019 + +umlsll za.d[w8, 4:7, vgx4], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0299 + +umlsll za.d[w8, 4:7], {z20.h - z23.h}, {z28.h - z31.h} // 11000001-11111101-00000010-10011001 +// CHECK-INST: umlsll za.d[w8, 4:7, vgx4], { z20.h - z23.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x99,0x02,0xfd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fd0299 + +umlsll za.d[w11, 0:3, vgx4], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00011000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16118 + +umlsll za.d[w11, 0:3], {z8.h - z11.h}, {z0.h - z3.h} // 11000001-11100001-01100001-00011000 +// CHECK-INST: umlsll za.d[w11, 0:3, vgx4], { z8.h - z11.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x18,0x61,0xe1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e16118 + +umlsll za.d[w9, 4:7, vgx4], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92199 + +umlsll za.d[w9, 4:7], {z12.h - z15.h}, {z8.h - z11.h} // 11000001-11101001-00100001-10011001 +// CHECK-INST: umlsll za.d[w9, 4:7, vgx4], { z12.h - z15.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x99,0x21,0xe9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e92199 + diff --git a/llvm/test/MC/AArch64/SME2/umopa-diagnostics.s b/llvm/test/MC/AArch64/SME2/umopa-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umopa-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid tile + +umopa za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umopa za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate + +umopa za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umopa za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umopa za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: umopa za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid suffixes + +umopa za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s +// CHECK-NEXT: umopa za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umopa za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umopa.s b/llvm/test/MC/AArch64/SME2/umopa.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umopa.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umopa za0.s, p0/m, p0/m, z0.h, z0.h // 10100001-10000000-00000000-00001000 +// CHECK-INST: umopa za0.s, p0/m, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x08,0x00,0x80,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1800008 + +umopa za1.s, p5/m, p2/m, z10.h, z21.h // 10100001-10010101-01010101-01001001 +// CHECK-INST: umopa za1.s, p5/m, p2/m, z10.h, z21.h +// CHECK-ENCODING: [0x49,0x55,0x95,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1955549 + +umopa za3.s, p3/m, p7/m, z13.h, z8.h // 10100001-10001000-11101101-10101011 +// CHECK-INST: umopa za3.s, p3/m, p7/m, z13.h, z8.h +// CHECK-ENCODING: [0xab,0xed,0x88,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a188edab + +umopa za3.s, p7/m, p7/m, z31.h, z31.h // 10100001-10011111-11111111-11101011 +// CHECK-INST: umopa za3.s, p7/m, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xeb,0xff,0x9f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19fffeb + +umopa za1.s, p3/m, p0/m, z17.h, z16.h // 10100001-10010000-00001110-00101001 +// CHECK-INST: umopa za1.s, p3/m, p0/m, z17.h, z16.h +// CHECK-ENCODING: [0x29,0x0e,0x90,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1900e29 + +umopa za1.s, p1/m, p4/m, z1.h, z30.h // 10100001-10011110-10000100-00101001 +// CHECK-INST: umopa za1.s, p1/m, p4/m, z1.h, z30.h +// CHECK-ENCODING: [0x29,0x84,0x9e,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19e8429 + +umopa za0.s, p5/m, p2/m, z19.h, z20.h // 10100001-10010100-01010110-01101000 +// CHECK-INST: umopa za0.s, p5/m, p2/m, z19.h, z20.h +// CHECK-ENCODING: [0x68,0x56,0x94,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1945668 + +umopa za0.s, p6/m, p0/m, z12.h, z2.h // 10100001-10000010-00011001-10001000 +// CHECK-INST: umopa za0.s, p6/m, p0/m, z12.h, z2.h +// CHECK-ENCODING: [0x88,0x19,0x82,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1821988 + +umopa za1.s, p2/m, p6/m, z1.h, z26.h // 10100001-10011010-11001000-00101001 +// CHECK-INST: umopa za1.s, p2/m, p6/m, z1.h, z26.h +// CHECK-ENCODING: [0x29,0xc8,0x9a,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19ac829 + +umopa za1.s, p2/m, p0/m, z22.h, z30.h // 10100001-10011110-00001010-11001001 +// CHECK-INST: umopa za1.s, p2/m, p0/m, z22.h, z30.h +// CHECK-ENCODING: [0xc9,0x0a,0x9e,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19e0ac9 + +umopa za2.s, p5/m, p7/m, z9.h, z1.h // 10100001-10000001-11110101-00101010 +// CHECK-INST: umopa za2.s, p5/m, p7/m, z9.h, z1.h +// CHECK-ENCODING: [0x2a,0xf5,0x81,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a181f52a + +umopa za3.s, p2/m, p5/m, z12.h, z11.h // 10100001-10001011-10101001-10001011 +// CHECK-INST: umopa za3.s, p2/m, p5/m, z12.h, z11.h +// CHECK-ENCODING: [0x8b,0xa9,0x8b,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a18ba98b + diff --git a/llvm/test/MC/AArch64/SME2/umops-diagnostics.s b/llvm/test/MC/AArch64/SME2/umops-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umops-diagnostics.s @@ -0,0 +1,35 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid tile + +umops za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umops za8.s, p0/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate + +umops za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: umops za0.s, p0/z, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umops za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate register, expected p0..p7 (without element suffix) +// CHECK-NEXT: umops za0.s, p15/m, p0/m, z0.s, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid suffixes + +umops za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected za[0-3].s +// CHECK-NEXT: umops za0.d, p0/z, p0/m, z0.d, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +umops za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: za0.s, p0/m, p0/m, z0.s, z0.d +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/umops.s b/llvm/test/MC/AArch64/SME2/umops.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/umops.s @@ -0,0 +1,86 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +umops za0.s, p0/m, p0/m, z0.h, z0.h // 10100001-10000000-00000000-00011000 +// CHECK-INST: umops za0.s, p0/m, p0/m, z0.h, z0.h +// CHECK-ENCODING: [0x18,0x00,0x80,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1800018 + +umops za1.s, p5/m, p2/m, z10.h, z21.h // 10100001-10010101-01010101-01011001 +// CHECK-INST: umops za1.s, p5/m, p2/m, z10.h, z21.h +// CHECK-ENCODING: [0x59,0x55,0x95,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1955559 + +umops za3.s, p3/m, p7/m, z13.h, z8.h // 10100001-10001000-11101101-10111011 +// CHECK-INST: umops za3.s, p3/m, p7/m, z13.h, z8.h +// CHECK-ENCODING: [0xbb,0xed,0x88,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a188edbb + +umops za3.s, p7/m, p7/m, z31.h, z31.h // 10100001-10011111-11111111-11111011 +// CHECK-INST: umops za3.s, p7/m, p7/m, z31.h, z31.h +// CHECK-ENCODING: [0xfb,0xff,0x9f,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19ffffb + +umops za1.s, p3/m, p0/m, z17.h, z16.h // 10100001-10010000-00001110-00111001 +// CHECK-INST: umops za1.s, p3/m, p0/m, z17.h, z16.h +// CHECK-ENCODING: [0x39,0x0e,0x90,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1900e39 + +umops za1.s, p1/m, p4/m, z1.h, z30.h // 10100001-10011110-10000100-00111001 +// CHECK-INST: umops za1.s, p1/m, p4/m, z1.h, z30.h +// CHECK-ENCODING: [0x39,0x84,0x9e,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19e8439 + +umops za0.s, p5/m, p2/m, z19.h, z20.h // 10100001-10010100-01010110-01111000 +// CHECK-INST: umops za0.s, p5/m, p2/m, z19.h, z20.h +// CHECK-ENCODING: [0x78,0x56,0x94,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1945678 + +umops za0.s, p6/m, p0/m, z12.h, z2.h // 10100001-10000010-00011001-10011000 +// CHECK-INST: umops za0.s, p6/m, p0/m, z12.h, z2.h +// CHECK-ENCODING: [0x98,0x19,0x82,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a1821998 + +umops za1.s, p2/m, p6/m, z1.h, z26.h // 10100001-10011010-11001000-00111001 +// CHECK-INST: umops za1.s, p2/m, p6/m, z1.h, z26.h +// CHECK-ENCODING: [0x39,0xc8,0x9a,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19ac839 + +umops za1.s, p2/m, p0/m, z22.h, z30.h // 10100001-10011110-00001010-11011001 +// CHECK-INST: umops za1.s, p2/m, p0/m, z22.h, z30.h +// CHECK-ENCODING: [0xd9,0x0a,0x9e,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a19e0ad9 + +umops za2.s, p5/m, p7/m, z9.h, z1.h // 10100001-10000001-11110101-00111010 +// CHECK-INST: umops za2.s, p5/m, p7/m, z9.h, z1.h +// CHECK-ENCODING: [0x3a,0xf5,0x81,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a181f53a + +umops za3.s, p2/m, p5/m, z12.h, z11.h // 10100001-10001011-10101001-10011011 +// CHECK-INST: umops za3.s, p2/m, p5/m, z12.h, z11.h +// CHECK-ENCODING: [0x9b,0xa9,0x8b,0xa1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: a18ba99b + diff --git a/llvm/test/MC/AArch64/SME2/uqrshr-diagnostics.s b/llvm/test/MC/AArch64/SME2/uqrshr-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uqrshr-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uqrshr z0.b, {z0.s-z4.s}, #32 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uqrshr z0.b, {z0.s-z4.s}, #32 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshr z0.h, {z10.s-z12.s}, #15 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uqrshr z0.h, {z10.s-z12.s}, #15 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshr z0.h, {z1.d-z4.d}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: uqrshr z0.h, {z1.d-z4.d}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshr z0.h, {z1.s-z2.s}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uqrshr z0.h, {z1.s-z2.s}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate + +uqrshr z31.h, {z28.d-z31.d}, #65 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: uqrshr z31.h, {z28.d-z31.d}, #65 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshr z31.h, {z28.s-z29.s}, #0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 16]. +// CHECK-NEXT: uqrshr z31.h, {z28.s-z29.s}, #0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshr z31.b, {z28.s-z31.s}, #33 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 32]. +// CHECK-NEXT: uqrshr z31.b, {z28.s-z31.s}, #33 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +uqrshr z23.s, {z12.s-z15.s}, #15 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: uqrshr z23.s, {z12.s-z15.s}, #15 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshr z23.b, {z12.d-z15.d}, #15 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uqrshr z23.b, {z12.d-z15.d}, #15 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uqrshr.s b/llvm/test/MC/AArch64/SME2/uqrshr.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uqrshr.s @@ -0,0 +1,88 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uqrshr z0.h, {z0.s - z1.s}, #16 // 11000001-11100000-11010100-00100000 +// CHECK-INST: uqrshr z0.h, { z0.s, z1.s }, #16 +// CHECK-ENCODING: [0x20,0xd4,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d420 + +uqrshr z21.h, {z10.s - z11.s}, #11 // 11000001-11100101-11010101-01110101 +// CHECK-INST: uqrshr z21.h, { z10.s, z11.s }, #11 +// CHECK-ENCODING: [0x75,0xd5,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5d575 + +uqrshr z23.h, {z12.s - z13.s}, #8 // 11000001-11101000-11010101-10110111 +// CHECK-INST: uqrshr z23.h, { z12.s, z13.s }, #8 +// CHECK-ENCODING: [0xb7,0xd5,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d5b7 + +uqrshr z31.h, {z30.s - z31.s}, #1 // 11000001-11101111-11010111-11111111 +// CHECK-INST: uqrshr z31.h, { z30.s, z31.s }, #1 +// CHECK-ENCODING: [0xff,0xd7,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efd7ff + + +uqrshr z0.b, {z0.s - z3.s}, #32 // 11000001-01100000-11011000-00100000 +// CHECK-INST: uqrshr z0.b, { z0.s - z3.s }, #32 +// CHECK-ENCODING: [0x20,0xd8,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d820 + +uqrshr z21.b, {z8.s - z11.s}, #11 // 11000001-01110101-11011001-00110101 +// CHECK-INST: uqrshr z21.b, { z8.s - z11.s }, #11 +// CHECK-ENCODING: [0x35,0xd9,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d935 + +uqrshr z23.b, {z12.s - z15.s}, #24 // 11000001-01101000-11011001-10110111 +// CHECK-INST: uqrshr z23.b, { z12.s - z15.s }, #24 +// CHECK-ENCODING: [0xb7,0xd9,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d9b7 + +uqrshr z31.b, {z28.s - z31.s}, #1 // 11000001-01111111-11011011-10111111 +// CHECK-INST: uqrshr z31.b, { z28.s - z31.s }, #1 +// CHECK-ENCODING: [0xbf,0xdb,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fdbbf + + +uqrshr z0.h, {z0.d - z3.d}, #64 // 11000001-10100000-11011000-00100000 +// CHECK-INST: uqrshr z0.h, { z0.d - z3.d }, #64 +// CHECK-ENCODING: [0x20,0xd8,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d820 + +uqrshr z21.h, {z8.d - z11.d}, #11 // 11000001-11110101-11011001-00110101 +// CHECK-INST: uqrshr z21.h, { z8.d - z11.d }, #11 +// CHECK-ENCODING: [0x35,0xd9,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d935 + +uqrshr z23.h, {z12.d - z15.d}, #24 // 11000001-11101000-11011001-10110111 +// CHECK-INST: uqrshr z23.h, { z12.d - z15.d }, #24 +// CHECK-ENCODING: [0xb7,0xd9,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d9b7 + +uqrshr z31.h, {z28.d - z31.d}, #1 // 11000001-11111111-11011011-10111111 +// CHECK-INST: uqrshr z31.h, { z28.d - z31.d }, #1 +// CHECK-ENCODING: [0xbf,0xdb,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffdbbf + diff --git a/llvm/test/MC/AArch64/SME2/uqrshrn-diagnostics.s b/llvm/test/MC/AArch64/SME2/uqrshrn-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uqrshrn-diagnostics.s @@ -0,0 +1,30 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uqrshrn z0.b, {z0.s-z4.s}, #32 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uqrshrn z0.b, {z0.s-z4.s}, #32 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uqrshrn z0.h, {z1.d-z4.d}, #1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: uqrshrn z0.h, {z1.d-z4.d}, #1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate + +uqrshrn z31.h, {z28.d-z31.d}, #65 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [1, 64]. +// CHECK-NEXT: uqrshrn z31.h, {z28.d-z31.d}, #65 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +uqrshrn z23.s, {z12.s-z15.s}, #24 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid element width +// CHECK-NEXT: uqrshrn z23.s, {z12.s-z15.s}, #24 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uqrshrn.s b/llvm/test/MC/AArch64/SME2/uqrshrn.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uqrshrn.s @@ -0,0 +1,63 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uqrshrn z0.b, {z0.s - z3.s}, #32 // 11000001-01100000-11011100-00100000 +// CHECK-INST: uqrshrn z0.b, { z0.s - z3.s }, #32 +// CHECK-ENCODING: [0x20,0xdc,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160dc20 + +uqrshrn z21.b, {z8.s - z11.s}, #11 // 11000001-01110101-11011101-00110101 +// CHECK-INST: uqrshrn z21.b, { z8.s - z11.s }, #11 +// CHECK-ENCODING: [0x35,0xdd,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175dd35 + +uqrshrn z23.b, {z12.s - z15.s}, #24 // 11000001-01101000-11011101-10110111 +// CHECK-INST: uqrshrn z23.b, { z12.s - z15.s }, #24 +// CHECK-ENCODING: [0xb7,0xdd,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168ddb7 + +uqrshrn z31.b, {z28.s - z31.s}, #1 // 11000001-01111111-11011111-10111111 +// CHECK-INST: uqrshrn z31.b, { z28.s - z31.s }, #1 +// CHECK-ENCODING: [0xbf,0xdf,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fdfbf + + +uqrshrn z0.h, {z0.d - z3.d}, #64 // 11000001-10100000-11011100-00100000 +// CHECK-INST: uqrshrn z0.h, { z0.d - z3.d }, #64 +// CHECK-ENCODING: [0x20,0xdc,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0dc20 + +uqrshrn z21.h, {z8.d - z11.d}, #11 // 11000001-11110101-11011101-00110101 +// CHECK-INST: uqrshrn z21.h, { z8.d - z11.d }, #11 +// CHECK-ENCODING: [0x35,0xdd,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5dd35 + +uqrshrn z23.h, {z12.d - z15.d}, #24 // 11000001-11101000-11011101-10110111 +// CHECK-INST: uqrshrn z23.h, { z12.d - z15.d }, #24 +// CHECK-ENCODING: [0xb7,0xdd,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8ddb7 + +uqrshrn z31.h, {z28.d - z31.d}, #1 // 11000001-11111111-11011111-10111111 +// CHECK-INST: uqrshrn z31.h, { z28.d - z31.d }, #1 +// CHECK-ENCODING: [0xbf,0xdf,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffdfbf + diff --git a/llvm/test/MC/AArch64/SME2/urshl-diagnostics.s b/llvm/test/MC/AArch64/SME2/urshl-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/urshl-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +urshl {z0.h-z2.h}, {z0.h-z1.h}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: urshl {z0.h-z2.h}, {z0.h-z1.h}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urshl {z0.s-z1.s}, {z2.s-z4.s}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: urshl {z0.s-z1.s}, {z2.s-z4.s}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urshl {z20.d-z23.d}, {z20.d-z23.d}, {z8.d-z12.d} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: urshl {z20.d-z23.d}, {z20.d-z23.d}, {z8.d-z12.d} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urshl {z29.b-z30.b}, {z30.b-z31.b}, z15.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: urshl {z29.b-z30.b}, {z30.b-z31.b}, z15.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urshl {z20.h-z23.h}, {z21.h-z24.h}, {z8.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: urshl {z20.h-z23.h}, {z21.h-z24.h}, {z8.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +urshl {z28.b-z31.b}, {z28.b-z31.b}, {z27.b-z30.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: urshl {z28.b-z31.b}, {z28.b-z31.b}, {z27.b-z30.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Single Register + +urshl {z20.h-z21.h}, {z20.h-z21.h}, z16.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.h..z15.h +// CHECK-NEXT: urshl {z20.h-z21.h}, {z20.h-z21.h}, z16.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +urshl {z0.d-z3.d}, {z0.d-z3.d}, z0.s +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.d..z15.d +// CHECK-NEXT: urshl {z0.d-z3.d}, {z0.d-z3.d}, z0.s +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/urshl.s b/llvm/test/MC/AArch64/SME2/urshl.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/urshl.s @@ -0,0 +1,414 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +urshl {z0.h, z1.h}, {z0.h, z1.h}, z0.h // 11000001-01100000-10100010-00100001 +// CHECK-INST: urshl { z0.h, z1.h }, { z0.h, z1.h }, z0.h +// CHECK-ENCODING: [0x21,0xa2,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160a221 + +urshl {z20.h, z21.h}, {z20.h, z21.h}, z5.h // 11000001-01100101-10100010-00110101 +// CHECK-INST: urshl { z20.h, z21.h }, { z20.h, z21.h }, z5.h +// CHECK-ENCODING: [0x35,0xa2,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165a235 + +urshl {z22.h, z23.h}, {z22.h, z23.h}, z8.h // 11000001-01101000-10100010-00110111 +// CHECK-INST: urshl { z22.h, z23.h }, { z22.h, z23.h }, z8.h +// CHECK-ENCODING: [0x37,0xa2,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168a237 + +urshl {z30.h, z31.h}, {z30.h, z31.h}, z15.h // 11000001-01101111-10100010-00111111 +// CHECK-INST: urshl { z30.h, z31.h }, { z30.h, z31.h }, z15.h +// CHECK-ENCODING: [0x3f,0xa2,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16fa23f + + +urshl {z0.h, z1.h}, {z0.h, z1.h}, {z0.h, z1.h} // 11000001-01100000-10110010-00100001 +// CHECK-INST: urshl { z0.h, z1.h }, { z0.h, z1.h }, { z0.h, z1.h } +// CHECK-ENCODING: [0x21,0xb2,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160b221 + +urshl {z20.h, z21.h}, {z20.h, z21.h}, {z20.h, z21.h} // 11000001-01110100-10110010-00110101 +// CHECK-INST: urshl { z20.h, z21.h }, { z20.h, z21.h }, { z20.h, z21.h } +// CHECK-ENCODING: [0x35,0xb2,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174b235 + +urshl {z22.h, z23.h}, {z22.h, z23.h}, {z8.h, z9.h} // 11000001-01101000-10110010-00110111 +// CHECK-INST: urshl { z22.h, z23.h }, { z22.h, z23.h }, { z8.h, z9.h } +// CHECK-ENCODING: [0x37,0xb2,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168b237 + +urshl {z30.h, z31.h}, {z30.h, z31.h}, {z30.h, z31.h} // 11000001-01111110-10110010-00111111 +// CHECK-INST: urshl { z30.h, z31.h }, { z30.h, z31.h }, { z30.h, z31.h } +// CHECK-ENCODING: [0x3f,0xb2,0x7e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17eb23f + + +urshl {z0.s, z1.s}, {z0.s, z1.s}, z0.s // 11000001-10100000-10100010-00100001 +// CHECK-INST: urshl { z0.s, z1.s }, { z0.s, z1.s }, z0.s +// CHECK-ENCODING: [0x21,0xa2,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0a221 + +urshl {z20.s, z21.s}, {z20.s, z21.s}, z5.s // 11000001-10100101-10100010-00110101 +// CHECK-INST: urshl { z20.s, z21.s }, { z20.s, z21.s }, z5.s +// CHECK-ENCODING: [0x35,0xa2,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5a235 + +urshl {z22.s, z23.s}, {z22.s, z23.s}, z8.s // 11000001-10101000-10100010-00110111 +// CHECK-INST: urshl { z22.s, z23.s }, { z22.s, z23.s }, z8.s +// CHECK-ENCODING: [0x37,0xa2,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8a237 + +urshl {z30.s, z31.s}, {z30.s, z31.s}, z15.s // 11000001-10101111-10100010-00111111 +// CHECK-INST: urshl { z30.s, z31.s }, { z30.s, z31.s }, z15.s +// CHECK-ENCODING: [0x3f,0xa2,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afa23f + + +urshl {z0.s, z1.s}, {z0.s, z1.s}, {z0.s, z1.s} // 11000001-10100000-10110010-00100001 +// CHECK-INST: urshl { z0.s, z1.s }, { z0.s, z1.s }, { z0.s, z1.s } +// CHECK-ENCODING: [0x21,0xb2,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0b221 + +urshl {z20.s, z21.s}, {z20.s, z21.s}, {z20.s, z21.s} // 11000001-10110100-10110010-00110101 +// CHECK-INST: urshl { z20.s, z21.s }, { z20.s, z21.s }, { z20.s, z21.s } +// CHECK-ENCODING: [0x35,0xb2,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4b235 + +urshl {z22.s, z23.s}, {z22.s, z23.s}, {z8.s, z9.s} // 11000001-10101000-10110010-00110111 +// CHECK-INST: urshl { z22.s, z23.s }, { z22.s, z23.s }, { z8.s, z9.s } +// CHECK-ENCODING: [0x37,0xb2,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8b237 + +urshl {z30.s, z31.s}, {z30.s, z31.s}, {z30.s, z31.s} // 11000001-10111110-10110010-00111111 +// CHECK-INST: urshl { z30.s, z31.s }, { z30.s, z31.s }, { z30.s, z31.s } +// CHECK-ENCODING: [0x3f,0xb2,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1beb23f + + +urshl {z0.d, z1.d}, {z0.d, z1.d}, z0.d // 11000001-11100000-10100010-00100001 +// CHECK-INST: urshl { z0.d, z1.d }, { z0.d, z1.d }, z0.d +// CHECK-ENCODING: [0x21,0xa2,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0a221 + +urshl {z20.d, z21.d}, {z20.d, z21.d}, z5.d // 11000001-11100101-10100010-00110101 +// CHECK-INST: urshl { z20.d, z21.d }, { z20.d, z21.d }, z5.d +// CHECK-ENCODING: [0x35,0xa2,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5a235 + +urshl {z22.d, z23.d}, {z22.d, z23.d}, z8.d // 11000001-11101000-10100010-00110111 +// CHECK-INST: urshl { z22.d, z23.d }, { z22.d, z23.d }, z8.d +// CHECK-ENCODING: [0x37,0xa2,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8a237 + +urshl {z30.d, z31.d}, {z30.d, z31.d}, z15.d // 11000001-11101111-10100010-00111111 +// CHECK-INST: urshl { z30.d, z31.d }, { z30.d, z31.d }, z15.d +// CHECK-ENCODING: [0x3f,0xa2,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efa23f + + +urshl {z0.d, z1.d}, {z0.d, z1.d}, {z0.d, z1.d} // 11000001-11100000-10110010-00100001 +// CHECK-INST: urshl { z0.d, z1.d }, { z0.d, z1.d }, { z0.d, z1.d } +// CHECK-ENCODING: [0x21,0xb2,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0b221 + +urshl {z20.d, z21.d}, {z20.d, z21.d}, {z20.d, z21.d} // 11000001-11110100-10110010-00110101 +// CHECK-INST: urshl { z20.d, z21.d }, { z20.d, z21.d }, { z20.d, z21.d } +// CHECK-ENCODING: [0x35,0xb2,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4b235 + +urshl {z22.d, z23.d}, {z22.d, z23.d}, {z8.d, z9.d} // 11000001-11101000-10110010-00110111 +// CHECK-INST: urshl { z22.d, z23.d }, { z22.d, z23.d }, { z8.d, z9.d } +// CHECK-ENCODING: [0x37,0xb2,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8b237 + +urshl {z30.d, z31.d}, {z30.d, z31.d}, {z30.d, z31.d} // 11000001-11111110-10110010-00111111 +// CHECK-INST: urshl { z30.d, z31.d }, { z30.d, z31.d }, { z30.d, z31.d } +// CHECK-ENCODING: [0x3f,0xb2,0xfe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1feb23f + + +urshl {z0.b, z1.b}, {z0.b, z1.b}, z0.b // 11000001-00100000-10100010-00100001 +// CHECK-INST: urshl { z0.b, z1.b }, { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x21,0xa2,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120a221 + +urshl {z20.b, z21.b}, {z20.b, z21.b}, z5.b // 11000001-00100101-10100010-00110101 +// CHECK-INST: urshl { z20.b, z21.b }, { z20.b, z21.b }, z5.b +// CHECK-ENCODING: [0x35,0xa2,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125a235 + +urshl {z22.b, z23.b}, {z22.b, z23.b}, z8.b // 11000001-00101000-10100010-00110111 +// CHECK-INST: urshl { z22.b, z23.b }, { z22.b, z23.b }, z8.b +// CHECK-ENCODING: [0x37,0xa2,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128a237 + +urshl {z30.b, z31.b}, {z30.b, z31.b}, z15.b // 11000001-00101111-10100010-00111111 +// CHECK-INST: urshl { z30.b, z31.b }, { z30.b, z31.b }, z15.b +// CHECK-ENCODING: [0x3f,0xa2,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12fa23f + + +urshl {z0.b, z1.b}, {z0.b, z1.b}, {z0.b, z1.b} // 11000001-00100000-10110010-00100001 +// CHECK-INST: urshl { z0.b, z1.b }, { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x21,0xb2,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120b221 + +urshl {z20.b, z21.b}, {z20.b, z21.b}, {z20.b, z21.b} // 11000001-00110100-10110010-00110101 +// CHECK-INST: urshl { z20.b, z21.b }, { z20.b, z21.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x35,0xb2,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134b235 + +urshl {z22.b, z23.b}, {z22.b, z23.b}, {z8.b, z9.b} // 11000001-00101000-10110010-00110111 +// CHECK-INST: urshl { z22.b, z23.b }, { z22.b, z23.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x37,0xb2,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128b237 + +urshl {z30.b, z31.b}, {z30.b, z31.b}, {z30.b, z31.b} // 11000001-00111110-10110010-00111111 +// CHECK-INST: urshl { z30.b, z31.b }, { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x3f,0xb2,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13eb23f + + +urshl {z0.h - z3.h}, {z0.h - z3.h}, z0.h // 11000001-01100000-10101010-00100001 +// CHECK-INST: urshl { z0.h - z3.h }, { z0.h - z3.h }, z0.h +// CHECK-ENCODING: [0x21,0xaa,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160aa21 + +urshl {z20.h - z23.h}, {z20.h - z23.h}, z5.h // 11000001-01100101-10101010-00110101 +// CHECK-INST: urshl { z20.h - z23.h }, { z20.h - z23.h }, z5.h +// CHECK-ENCODING: [0x35,0xaa,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165aa35 + +urshl {z20.h - z23.h}, {z20.h - z23.h}, z8.h // 11000001-01101000-10101010-00110101 +// CHECK-INST: urshl { z20.h - z23.h }, { z20.h - z23.h }, z8.h +// CHECK-ENCODING: [0x35,0xaa,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168aa35 + +urshl {z28.h - z31.h}, {z28.h - z31.h}, z15.h // 11000001-01101111-10101010-00111101 +// CHECK-INST: urshl { z28.h - z31.h }, { z28.h - z31.h }, z15.h +// CHECK-ENCODING: [0x3d,0xaa,0x6f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c16faa3d + + +urshl {z0.h - z3.h}, {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01100000-10111010-00100001 +// CHECK-INST: urshl { z0.h - z3.h }, { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x21,0xba,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160ba21 + +urshl {z20.h - z23.h}, {z20.h - z23.h}, {z20.h - z23.h} // 11000001-01110100-10111010-00110101 +// CHECK-INST: urshl { z20.h - z23.h }, { z20.h - z23.h }, { z20.h - z23.h } +// CHECK-ENCODING: [0x35,0xba,0x74,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c174ba35 + +urshl {z20.h - z23.h}, {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01101000-10111010-00110101 +// CHECK-INST: urshl { z20.h - z23.h }, { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x35,0xba,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168ba35 + +urshl {z28.h - z31.h}, {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01111100-10111010-00111101 +// CHECK-INST: urshl { z28.h - z31.h }, { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x3d,0xba,0x7c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17cba3d + + +urshl {z0.s - z3.s}, {z0.s - z3.s}, z0.s // 11000001-10100000-10101010-00100001 +// CHECK-INST: urshl { z0.s - z3.s }, { z0.s - z3.s }, z0.s +// CHECK-ENCODING: [0x21,0xaa,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0aa21 + +urshl {z20.s - z23.s}, {z20.s - z23.s}, z5.s // 11000001-10100101-10101010-00110101 +// CHECK-INST: urshl { z20.s - z23.s }, { z20.s - z23.s }, z5.s +// CHECK-ENCODING: [0x35,0xaa,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5aa35 + +urshl {z20.s - z23.s}, {z20.s - z23.s}, z8.s // 11000001-10101000-10101010-00110101 +// CHECK-INST: urshl { z20.s - z23.s }, { z20.s - z23.s }, z8.s +// CHECK-ENCODING: [0x35,0xaa,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8aa35 + +urshl {z28.s - z31.s}, {z28.s - z31.s}, z15.s // 11000001-10101111-10101010-00111101 +// CHECK-INST: urshl { z28.s - z31.s }, { z28.s - z31.s }, z15.s +// CHECK-ENCODING: [0x3d,0xaa,0xaf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1afaa3d + + +urshl {z0.s - z3.s}, {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10100000-10111010-00100001 +// CHECK-INST: urshl { z0.s - z3.s }, { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x21,0xba,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0ba21 + +urshl {z20.s - z23.s}, {z20.s - z23.s}, {z20.s - z23.s} // 11000001-10110100-10111010-00110101 +// CHECK-INST: urshl { z20.s - z23.s }, { z20.s - z23.s }, { z20.s - z23.s } +// CHECK-ENCODING: [0x35,0xba,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4ba35 + +urshl {z20.s - z23.s}, {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10101000-10111010-00110101 +// CHECK-INST: urshl { z20.s - z23.s }, { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x35,0xba,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8ba35 + +urshl {z28.s - z31.s}, {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10111100-10111010-00111101 +// CHECK-INST: urshl { z28.s - z31.s }, { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x3d,0xba,0xbc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bcba3d + + +urshl {z0.d - z3.d}, {z0.d - z3.d}, z0.d // 11000001-11100000-10101010-00100001 +// CHECK-INST: urshl { z0.d - z3.d }, { z0.d - z3.d }, z0.d +// CHECK-ENCODING: [0x21,0xaa,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0aa21 + +urshl {z20.d - z23.d}, {z20.d - z23.d}, z5.d // 11000001-11100101-10101010-00110101 +// CHECK-INST: urshl { z20.d - z23.d }, { z20.d - z23.d }, z5.d +// CHECK-ENCODING: [0x35,0xaa,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5aa35 + +urshl {z20.d - z23.d}, {z20.d - z23.d}, z8.d // 11000001-11101000-10101010-00110101 +// CHECK-INST: urshl { z20.d - z23.d }, { z20.d - z23.d }, z8.d +// CHECK-ENCODING: [0x35,0xaa,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8aa35 + +urshl {z28.d - z31.d}, {z28.d - z31.d}, z15.d // 11000001-11101111-10101010-00111101 +// CHECK-INST: urshl { z28.d - z31.d }, { z28.d - z31.d }, z15.d +// CHECK-ENCODING: [0x3d,0xaa,0xef,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1efaa3d + + +urshl {z0.d - z3.d}, {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11100000-10111010-00100001 +// CHECK-INST: urshl { z0.d - z3.d }, { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x21,0xba,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0ba21 + +urshl {z20.d - z23.d}, {z20.d - z23.d}, {z20.d - z23.d} // 11000001-11110100-10111010-00110101 +// CHECK-INST: urshl { z20.d - z23.d }, { z20.d - z23.d }, { z20.d - z23.d } +// CHECK-ENCODING: [0x35,0xba,0xf4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f4ba35 + +urshl {z20.d - z23.d}, {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11101000-10111010-00110101 +// CHECK-INST: urshl { z20.d - z23.d }, { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x35,0xba,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8ba35 + +urshl {z28.d - z31.d}, {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11111100-10111010-00111101 +// CHECK-INST: urshl { z28.d - z31.d }, { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x3d,0xba,0xfc,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1fcba3d + + +urshl {z0.b - z3.b}, {z0.b - z3.b}, z0.b // 11000001-00100000-10101010-00100001 +// CHECK-INST: urshl { z0.b - z3.b }, { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x21,0xaa,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120aa21 + +urshl {z20.b - z23.b}, {z20.b - z23.b}, z5.b // 11000001-00100101-10101010-00110101 +// CHECK-INST: urshl { z20.b - z23.b }, { z20.b - z23.b }, z5.b +// CHECK-ENCODING: [0x35,0xaa,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125aa35 + +urshl {z20.b - z23.b}, {z20.b - z23.b}, z8.b // 11000001-00101000-10101010-00110101 +// CHECK-INST: urshl { z20.b - z23.b }, { z20.b - z23.b }, z8.b +// CHECK-ENCODING: [0x35,0xaa,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128aa35 + +urshl {z28.b - z31.b}, {z28.b - z31.b}, z15.b // 11000001-00101111-10101010-00111101 +// CHECK-INST: urshl { z28.b - z31.b }, { z28.b - z31.b }, z15.b +// CHECK-ENCODING: [0x3d,0xaa,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12faa3d + + +urshl {z0.b - z3.b}, {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00100000-10111010-00100001 +// CHECK-INST: urshl { z0.b - z3.b }, { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x21,0xba,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120ba21 + +urshl {z20.b - z23.b}, {z20.b - z23.b}, {z20.b - z23.b} // 11000001-00110100-10111010-00110101 +// CHECK-INST: urshl { z20.b - z23.b }, { z20.b - z23.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x35,0xba,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c134ba35 + +urshl {z20.b - z23.b}, {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00101000-10111010-00110101 +// CHECK-INST: urshl { z20.b - z23.b }, { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x35,0xba,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128ba35 + +urshl {z28.b - z31.b}, {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00111100-10111010-00111101 +// CHECK-INST: urshl { z28.b - z31.b }, { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x3d,0xba,0x3c,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13cba3d + + diff --git a/llvm/test/MC/AArch64/SME2/usdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/usdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/usdot-diagnostics.s @@ -0,0 +1,60 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid select register + +usdot za.s[w7, 0, vgx2], {z0.b-z1.b}, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: usdot za.s[w7, 0, vgx2], {z0.b-z1.b}, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usdot za.s[w12, 0, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: usdot za.s[w12, 0, vgx4], {z0.b-z3.b}, z0.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid select offset + +usdot za.s[w8, 16], {z0.b-z1.b}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: usdot za.s[w8, 16], {z0.b-z1.b}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Out of range element index + +usdot za.s[w8, 0], {z0.b-z1.b}, z0.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: usdot za.s[w8, 0], {z0.b-z1.b}, z0.b[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usdot za.s[w8, 0], {z0.b-z3.b}, z0.b[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: usdot za.s[w8, 0], {z0.b-z3.b}, z0.b[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// ZPR range constraint + +usdot za.s[w8, 5], {z0.b-z1.b}, z16.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: usdot za.s[w8, 5], {z0.b-z1.b}, z16.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usdot za.s[w8, 5], {z0.b-z3.b}, z16.b[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: usdot za.s[w8, 5], {z0.b-z3.b}, z16.b[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// USDOT (multi-single) + +usdot za.s[w8, 5], {z0.b-z1.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: usdot za.s[w8, 5], {z0.b-z1.b}, z16.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usdot za.s[w8, 5], {z0.b-z3.b}, z16.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: usdot za.s[w8, 5], {z0.b-z3.b}, z16.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/usdot.s b/llvm/test/MC/AArch64/SME2/usdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/usdot.s @@ -0,0 +1,883 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +usdot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x08,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201408 + +usdot za.s[w8, 0], {z0.b, z1.b}, z0.b // 11000001-00100000-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x08,0x14,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1201408 + +usdot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01001101 +// CHECK-INST: usdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x4d,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125554d + +usdot za.s[w10, 5], {z10.b, z11.b}, z5.b // 11000001-00100101-01010101-01001101 +// CHECK-INST: usdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x4d,0x55,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c125554d + +usdot za.s[w11, 7, vgx2], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xaf,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875af + +usdot za.s[w11, 7], {z13.b, z14.b}, z8.b // 11000001-00101000-01110101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xaf,0x75,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12875af + +usdot za.s[w11, 7, vgx2], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xef,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77ef + +usdot za.s[w11, 7], {z31.b, z0.b}, z15.b // 11000001-00101111-01110111-11101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xef,0x77,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f77ef + +usdot za.s[w8, 5, vgx2], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x2d,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120162d + +usdot za.s[w8, 5], {z17.b, z18.b}, z0.b // 11000001-00100000-00010110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x2d,0x16,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120162d + +usdot za.s[w8, 1, vgx2], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x29,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1429 + +usdot za.s[w8, 1], {z1.b, z2.b}, z14.b // 11000001-00101110-00010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x29,0x14,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e1429 + +usdot za.s[w10, 0, vgx2], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01101000 +// CHECK-INST: usdot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x68,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245668 + +usdot za.s[w10, 0], {z19.b, z20.b}, z4.b // 11000001-00100100-01010110-01101000 +// CHECK-INST: usdot za.s[w10, 0, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x68,0x56,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1245668 + +usdot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x88,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221588 + +usdot za.s[w8, 0], {z12.b, z13.b}, z2.b // 11000001-00100010-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x88,0x15,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1221588 + +usdot za.s[w10, 1, vgx2], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x29,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5429 + +usdot za.s[w10, 1], {z1.b, z2.b}, z10.b // 11000001-00101010-01010100-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x29,0x54,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a5429 + +usdot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11001101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xcd,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16cd + +usdot za.s[w8, 5], {z22.b, z23.b}, z14.b // 11000001-00101110-00010110-11001101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xcd,0x16,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e16cd + +usdot za.s[w11, 2, vgx2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x2a,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121752a + +usdot za.s[w11, 2], {z9.b, z10.b}, z1.b // 11000001-00100001-01110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x2a,0x75,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c121752a + +usdot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x8f,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b358f + +usdot za.s[w9, 7], {z12.b, z13.b}, z11.b // 11000001-00101011-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x8f,0x35,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b358f + + +usdot za.s[w8, 0, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00101000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x28,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501028 + +usdot za.s[w8, 0], {z0.b, z1.b}, z0.b[0] // 11000001-01010000-00010000-00101000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x28,0x10,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501028 + +usdot za.s[w10, 5, vgx2], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01101101 +// CHECK-INST: usdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x6d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155556d + +usdot za.s[w10, 5], {z10.b, z11.b}, z5.b[1] // 11000001-01010101-01010101-01101101 +// CHECK-INST: usdot za.s[w10, 5, vgx2], { z10.b, z11.b }, z5.b[1] +// CHECK-ENCODING: [0x6d,0x55,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155556d + +usdot za.s[w11, 7, vgx2], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xaf,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587daf + +usdot za.s[w11, 7], {z12.b, z13.b}, z8.b[3] // 11000001-01011000-01111101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z12.b, z13.b }, z8.b[3] +// CHECK-ENCODING: [0xaf,0x7d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1587daf + +usdot za.s[w11, 7, vgx2], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xef,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fef + +usdot za.s[w11, 7], {z30.b, z31.b}, z15.b[3] // 11000001-01011111-01111111-11101111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z30.b, z31.b }, z15.b[3] +// CHECK-ENCODING: [0xef,0x7f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f7fef + +usdot za.s[w8, 5, vgx2], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x2d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e2d + +usdot za.s[w8, 5], {z16.b, z17.b}, z0.b[3] // 11000001-01010000-00011110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z16.b, z17.b }, z0.b[3] +// CHECK-ENCODING: [0x2d,0x1e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1501e2d + +usdot za.s[w8, 1, vgx2], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x29,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1429 + +usdot za.s[w8, 1], {z0.b, z1.b}, z14.b[1] // 11000001-01011110-00010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx2], { z0.b, z1.b }, z14.b[1] +// CHECK-ENCODING: [0x29,0x14,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1429 + +usdot za.s[w10, 0, vgx2], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01101000 +// CHECK-INST: usdot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x68,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545668 + +usdot za.s[w10, 0], {z18.b, z19.b}, z4.b[1] // 11000001-01010100-01010110-01101000 +// CHECK-INST: usdot za.s[w10, 0, vgx2], { z18.b, z19.b }, z4.b[1] +// CHECK-ENCODING: [0x68,0x56,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1545668 + +usdot za.s[w8, 0, vgx2], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10101000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xa8,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219a8 + +usdot za.s[w8, 0], {z12.b, z13.b}, z2.b[2] // 11000001-01010010-00011001-10101000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z12.b, z13.b }, z2.b[2] +// CHECK-ENCODING: [0xa8,0x19,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15219a8 + +usdot za.s[w10, 1, vgx2], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x29,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5829 + +usdot za.s[w10, 1], {z0.b, z1.b}, z10.b[2] // 11000001-01011010-01011000-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx2], { z0.b, z1.b }, z10.b[2] +// CHECK-ENCODING: [0x29,0x58,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a5829 + +usdot za.s[w8, 5, vgx2], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11101101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xed,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1aed + +usdot za.s[w8, 5], {z22.b, z23.b}, z14.b[2] // 11000001-01011110-00011010-11101101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z22.b, z23.b }, z14.b[2] +// CHECK-ENCODING: [0xed,0x1a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e1aed + +usdot za.s[w11, 2, vgx2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x2a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151752a + +usdot za.s[w11, 2], {z8.b, z9.b}, z1.b[1] // 11000001-01010001-01110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx2], { z8.b, z9.b }, z1.b[1] +// CHECK-ENCODING: [0x2a,0x75,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151752a + +usdot za.s[w9, 7, vgx2], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10101111 +// CHECK-INST: usdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xaf,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39af + +usdot za.s[w9, 7], {z12.b, z13.b}, z11.b[2] // 11000001-01011011-00111001-10101111 +// CHECK-INST: usdot za.s[w9, 7, vgx2], { z12.b, z13.b }, z11.b[2] +// CHECK-ENCODING: [0xaf,0x39,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b39af + + +usdot za.s[w8, 0, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001-10100000-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x14,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01408 + +usdot za.s[w8, 0], {z0.b, z1.b}, {z0.b, z1.b} // 11000001-10100000-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x08,0x14,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a01408 + +usdot za.s[w10, 5, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001-10110100-01010101-01001101 +// CHECK-INST: usdot za.s[w10, 5, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x4d,0x55,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4554d + +usdot za.s[w10, 5], {z10.b, z11.b}, {z20.b, z21.b} // 11000001-10110100-01010101-01001101 +// CHECK-INST: usdot za.s[w10, 5, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x4d,0x55,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b4554d + +usdot za.s[w11, 7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001-10101000-01110101-10001111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x8f,0x75,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8758f + +usdot za.s[w11, 7], {z12.b, z13.b}, {z8.b, z9.b} // 11000001-10101000-01110101-10001111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x8f,0x75,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8758f + +usdot za.s[w11, 7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001-10111110-01110111-11001111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xcf,0x77,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be77cf + +usdot za.s[w11, 7], {z30.b, z31.b}, {z30.b, z31.b} // 11000001-10111110-01110111-11001111 +// CHECK-INST: usdot za.s[w11, 7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xcf,0x77,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be77cf + +usdot za.s[w8, 5, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001-10110000-00010110-00001101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x0d,0x16,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b0160d + +usdot za.s[w8, 5], {z16.b, z17.b}, {z16.b, z17.b} // 11000001-10110000-00010110-00001101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x0d,0x16,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b0160d + +usdot za.s[w8, 1, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001-10111110-00010100-00001001 +// CHECK-INST: usdot za.s[w8, 1, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x09,0x14,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1409 + +usdot za.s[w8, 1], {z0.b, z1.b}, {z30.b, z31.b} // 11000001-10111110-00010100-00001001 +// CHECK-INST: usdot za.s[w8, 1, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x09,0x14,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be1409 + +usdot za.s[w10, 0, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001-10110100-01010110-01001000 +// CHECK-INST: usdot za.s[w10, 0, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x48,0x56,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45648 + +usdot za.s[w10, 0], {z18.b, z19.b}, {z20.b, z21.b} // 11000001-10110100-01010110-01001000 +// CHECK-INST: usdot za.s[w10, 0, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x48,0x56,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b45648 + +usdot za.s[w8, 0, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001-10100010-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x88,0x15,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21588 + +usdot za.s[w8, 0], {z12.b, z13.b}, {z2.b, z3.b} // 11000001-10100010-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x88,0x15,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a21588 + +usdot za.s[w10, 1, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001-10111010-01010100-00001001 +// CHECK-INST: usdot za.s[w10, 1, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x09,0x54,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5409 + +usdot za.s[w10, 1], {z0.b, z1.b}, {z26.b, z27.b} // 11000001-10111010-01010100-00001001 +// CHECK-INST: usdot za.s[w10, 1, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x09,0x54,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba5409 + +usdot za.s[w8, 5, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001-10111110-00010110-11001101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xcd,0x16,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be16cd + +usdot za.s[w8, 5], {z22.b, z23.b}, {z30.b, z31.b} // 11000001-10111110-00010110-11001101 +// CHECK-INST: usdot za.s[w8, 5, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xcd,0x16,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be16cd + +usdot za.s[w11, 2, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001-10100000-01110101-00001010 +// CHECK-INST: usdot za.s[w11, 2, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x0a,0x75,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0750a + +usdot za.s[w11, 2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001-10100000-01110101-00001010 +// CHECK-INST: usdot za.s[w11, 2, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x0a,0x75,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0750a + +usdot za.s[w9, 7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001-10101010-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x8f,0x35,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa358f + +usdot za.s[w9, 7], {z12.b, z13.b}, {z10.b, z11.b} // 11000001-10101010-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x8f,0x35,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa358f + + +usdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x08,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301408 + +usdot za.s[w8, 0], {z0.b - z3.b}, z0.b // 11000001-00110000-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x08,0x14,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1301408 + +usdot za.s[w10, 5, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01001101 +// CHECK-INST: usdot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x4d,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135554d + +usdot za.s[w10, 5], {z10.b - z13.b}, z5.b // 11000001-00110101-01010101-01001101 +// CHECK-INST: usdot za.s[w10, 5, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x4d,0x55,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135554d + +usdot za.s[w11, 7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xaf,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875af + +usdot za.s[w11, 7], {z13.b - z16.b}, z8.b // 11000001-00111000-01110101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xaf,0x75,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13875af + +usdot za.s[w11, 7, vgx4], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xef,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77ef + +usdot za.s[w11, 7], {z31.b - z2.b}, z15.b // 11000001-00111111-01110111-11101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xef,0x77,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f77ef + +usdot za.s[w8, 5, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x2d,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c130162d + +usdot za.s[w8, 5], {z17.b - z20.b}, z0.b // 11000001-00110000-00010110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x2d,0x16,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c130162d + +usdot za.s[w8, 1, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x29,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1429 + +usdot za.s[w8, 1], {z1.b - z4.b}, z14.b // 11000001-00111110-00010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x29,0x14,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e1429 + +usdot za.s[w10, 0, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01101000 +// CHECK-INST: usdot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x68,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345668 + +usdot za.s[w10, 0], {z19.b - z22.b}, z4.b // 11000001-00110100-01010110-01101000 +// CHECK-INST: usdot za.s[w10, 0, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x68,0x56,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1345668 + +usdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x88,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321588 + +usdot za.s[w8, 0], {z12.b - z15.b}, z2.b // 11000001-00110010-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x88,0x15,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1321588 + +usdot za.s[w10, 1, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x29,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5429 + +usdot za.s[w10, 1], {z1.b - z4.b}, z10.b // 11000001-00111010-01010100-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x29,0x54,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a5429 + +usdot za.s[w8, 5, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11001101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xcd,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16cd + +usdot za.s[w8, 5], {z22.b - z25.b}, z14.b // 11000001-00111110-00010110-11001101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xcd,0x16,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e16cd + +usdot za.s[w11, 2, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x2a,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131752a + +usdot za.s[w11, 2], {z9.b - z12.b}, z1.b // 11000001-00110001-01110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x2a,0x75,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c131752a + +usdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x8f,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b358f + +usdot za.s[w9, 7], {z12.b - z15.b}, z11.b // 11000001-00111011-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x8f,0x35,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b358f + + +usdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00101000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x28,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509028 + +usdot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10010000-00101000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x28,0x90,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509028 + +usdot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00101101 +// CHECK-INST: usdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x2d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d52d + +usdot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11010101-00101101 +// CHECK-INST: usdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x2d,0xd5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155d52d + +usdot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xaf,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fdaf + +usdot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11111101-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xaf,0xfd,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158fdaf + +usdot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xaf,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffaf + +usdot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11111111-10101111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xaf,0xff,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fffaf + +usdot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x2d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e2d + +usdot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10011110-00101101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x2d,0x9e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1509e2d + +usdot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x29,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9429 + +usdot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10010100-00101001 +// CHECK-INST: usdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x29,0x94,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9429 + +usdot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00101000 +// CHECK-INST: usdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x28,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d628 + +usdot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11010110-00101000 +// CHECK-INST: usdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x28,0xd6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154d628 + +usdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10101000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xa8,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299a8 + +usdot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10011001-10101000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xa8,0x99,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15299a8 + +usdot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x29,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad829 + +usdot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11011000-00101001 +// CHECK-INST: usdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x29,0xd8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ad829 + +usdot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10101101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xad,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9aad + +usdot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10011010-10101101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xad,0x9a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e9aad + +usdot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x2a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f52a + +usdot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11110101-00101010 +// CHECK-INST: usdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x2a,0xf5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151f52a + +usdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10101111 +// CHECK-INST: usdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xaf,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9af + +usdot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10111001-10101111 +// CHECK-INST: usdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xaf,0xb9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15bb9af + + +usdot za.s[w8, 0, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x14,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11408 + +usdot za.s[w8, 0], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00010100-00001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x08,0x14,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11408 + +usdot za.s[w10, 5, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01010101-00001101 +// CHECK-INST: usdot za.s[w10, 5, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x0d,0x55,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5550d + +usdot za.s[w10, 5], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01010101-00001101 +// CHECK-INST: usdot za.s[w10, 5, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x0d,0x55,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5550d + +usdot za.s[w11, 7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01110101-10001111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x8f,0x75,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9758f + +usdot za.s[w11, 7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01110101-10001111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x8f,0x75,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9758f + +usdot za.s[w11, 7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01110111-10001111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x8f,0x77,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd778f + +usdot za.s[w11, 7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01110111-10001111 +// CHECK-INST: usdot za.s[w11, 7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x8f,0x77,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd778f + +usdot za.s[w8, 5, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00010110-00001101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x0d,0x16,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b1160d + +usdot za.s[w8, 5], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00010110-00001101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x0d,0x16,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b1160d + +usdot za.s[w8, 1, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00010100-00001001 +// CHECK-INST: usdot za.s[w8, 1, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x09,0x14,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1409 + +usdot za.s[w8, 1], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00010100-00001001 +// CHECK-INST: usdot za.s[w8, 1, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x09,0x14,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd1409 + +usdot za.s[w10, 0, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01010110-00001000 +// CHECK-INST: usdot za.s[w10, 0, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x08,0x56,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55608 + +usdot za.s[w10, 0], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01010110-00001000 +// CHECK-INST: usdot za.s[w10, 0, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x08,0x56,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b55608 + +usdot za.s[w8, 0, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x88,0x15,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11588 + +usdot za.s[w8, 0], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00010101-10001000 +// CHECK-INST: usdot za.s[w8, 0, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x88,0x15,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a11588 + +usdot za.s[w10, 1, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01010100-00001001 +// CHECK-INST: usdot za.s[w10, 1, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x09,0x54,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95409 + +usdot za.s[w10, 1], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01010100-00001001 +// CHECK-INST: usdot za.s[w10, 1, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x09,0x54,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b95409 + +usdot za.s[w8, 5, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00010110-10001101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x8d,0x16,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd168d + +usdot za.s[w8, 5], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00010110-10001101 +// CHECK-INST: usdot za.s[w8, 5, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x8d,0x16,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd168d + +usdot za.s[w11, 2, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01110101-00001010 +// CHECK-INST: usdot za.s[w11, 2, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x0a,0x75,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a1750a + +usdot za.s[w11, 2], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01110101-00001010 +// CHECK-INST: usdot za.s[w11, 2, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x0a,0x75,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a1750a + +usdot za.s[w9, 7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x8f,0x35,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9358f + +usdot za.s[w9, 7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00110101-10001111 +// CHECK-INST: usdot za.s[w9, 7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x8f,0x35,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a9358f + diff --git a/llvm/test/MC/AArch64/SME2/usmlall-diagnostics.s b/llvm/test/MC/AArch64/SME2/usmlall-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/usmlall-diagnostics.s @@ -0,0 +1,84 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +usmlall za.s[w11, 4:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: usmlall za.s[w11, 4:7, vgx2], {z12.h-z14.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w11, 4:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: usmlall za.s[w11, 4:7, vgx4], {z12.h-z17.h}, z8.h[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: usmlall za.s[w10, 4:7], {z8.b-z11.b}, {z21.b-z24.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w10, 4:7], {z8.b-z9.b}, {z21.b-z22.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: usmlall za.s[w10, 4:7], {z8.b-z9.b}, {z21.b-z22.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid indexed-vector register + +usmlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: usmlall za.s[w10, 0:3], z19.b, z4.s[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w10, 4:7], z10.b, z30.b[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid restricted vector register, expected z0.b..z15.b +// CHECK-NEXT: usmlall za.s[w10, 4:7], z10.b, z30.b[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +usmlall za.s[w7, 6:7, vgx2], {z12.b-z13.b}, {z8.b-z9.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: usmlall za.s[w7, 6:7, vgx2], {z12.b-z13.b}, {z8.b-z9.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w12, 6:7, vgx2], {z12.b-z13.b}, {z8.b-z9.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: usmlall za.s[w12, 6:7, vgx2], {z12.b-z13.b}, {z8.b-z9.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select offset + +usmlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: usmlall za.s[w11, 4:8], {z30.b-z31.b}, z15.b[15] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w8, 5:8, vgx2], {z22.b-z23.b}, z14.b[2] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector select offset must be an immediate range of the form :, where the first immediate is a multiple of 4 in the range [0, 4] or [0, 12] depending on the instruction, and the second immediate is immf + 3. +// CHECK-NEXT: usmlall za.s[w8, 5:8, vgx2], {z22.b-z23.b}, z14.b[2] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +usmlall za.h[w8, 6:7, vgx2], {z12.b-z13.b}, {z8.b-z9.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: usmlall za.h[w8, 6:7, vgx2], {z12.b-z13.b}, {z8.b-z9.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector lane index + +usmlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[16] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: usmlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[16] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +usmlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 15]. +// CHECK-NEXT: usmlall za.s[w8, 0:3], {z0.b-z1.b}, z0.b[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/usmlall.s b/llvm/test/MC/AArch64/SME2/usmlall.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/usmlall.s @@ -0,0 +1,1029 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +usmlall za.s[w8, 0:3], z0.b, z0.b // 11000001-00100000-00000100-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z0.b, z0.b +// CHECK-ENCODING: [0x04,0x04,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200404 + +usmlall za.s[w10, 4:7], z10.b, z5.b // 11000001-00100101-01000101-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7], z10.b, z5.b +// CHECK-ENCODING: [0x45,0x45,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254545 + +usmlall za.s[w11, 12:15], z13.b, z8.b // 11000001-00101000-01100101-10100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z13.b, z8.b +// CHECK-ENCODING: [0xa7,0x65,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12865a7 + +usmlall za.s[w11, 12:15], z31.b, z15.b // 11000001-00101111-01100111-11100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z31.b, z15.b +// CHECK-ENCODING: [0xe7,0x67,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f67e7 + +usmlall za.s[w8, 4:7], z17.b, z0.b // 11000001-00100000-00000110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z17.b, z0.b +// CHECK-ENCODING: [0x25,0x06,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200625 + +usmlall za.s[w8, 4:7], z1.b, z14.b // 11000001-00101110-00000100-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z1.b, z14.b +// CHECK-ENCODING: [0x25,0x04,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0425 + +usmlall za.s[w10, 0:3], z19.b, z4.b // 11000001-00100100-01000110-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3], z19.b, z4.b +// CHECK-ENCODING: [0x64,0x46,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244664 + +usmlall za.s[w8, 0:3], z12.b, z2.b // 11000001-00100010-00000101-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z12.b, z2.b +// CHECK-ENCODING: [0x84,0x05,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220584 + +usmlall za.s[w10, 4:7], z1.b, z10.b // 11000001-00101010-01000100-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7], z1.b, z10.b +// CHECK-ENCODING: [0x25,0x44,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4425 + +usmlall za.s[w8, 4:7], z22.b, z14.b // 11000001-00101110-00000110-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7], z22.b, z14.b +// CHECK-ENCODING: [0xc5,0x06,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e06c5 + +usmlall za.s[w11, 8:11], z9.b, z1.b // 11000001-00100001-01100101-00100110 +// CHECK-INST: usmlall za.s[w11, 8:11], z9.b, z1.b +// CHECK-ENCODING: [0x26,0x65,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216526 + +usmlall za.s[w9, 12:15], z12.b, z11.b // 11000001-00101011-00100101-10000111 +// CHECK-INST: usmlall za.s[w9, 12:15], z12.b, z11.b +// CHECK-ENCODING: [0x87,0x25,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2587 + + +usmlall za.s[w8, 0:3], z0.b, z0.b[0] // 11000001-00000000-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z0.b, z0.b[0] +// CHECK-ENCODING: [0x04,0x00,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000004 + +usmlall za.s[w10, 4:7], z10.b, z5.b[5] // 11000001-00000101-01010101-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7], z10.b, z5.b[5] +// CHECK-ENCODING: [0x45,0x55,0x05,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1055545 + +usmlall za.s[w11, 12:15], z13.b, z8.b[11] // 11000001-00001000-11101101-10100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z13.b, z8.b[11] +// CHECK-ENCODING: [0xa7,0xed,0x08,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c108eda7 + +usmlall za.s[w11, 12:15], z31.b, z15.b[15] // 11000001-00001111-11111111-11100111 +// CHECK-INST: usmlall za.s[w11, 12:15], z31.b, z15.b[15] +// CHECK-ENCODING: [0xe7,0xff,0x0f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10fffe7 + +usmlall za.s[w8, 4:7], z17.b, z0.b[3] // 11000001-00000000-00001110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z17.b, z0.b[3] +// CHECK-ENCODING: [0x25,0x0e,0x00,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1000e25 + +usmlall za.s[w8, 4:7], z1.b, z14.b[9] // 11000001-00001110-10000100-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7], z1.b, z14.b[9] +// CHECK-ENCODING: [0x25,0x84,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e8425 + +usmlall za.s[w10, 0:3], z19.b, z4.b[5] // 11000001-00000100-01010110-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3], z19.b, z4.b[5] +// CHECK-ENCODING: [0x64,0x56,0x04,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1045664 + +usmlall za.s[w8, 0:3], z12.b, z2.b[6] // 11000001-00000010-00011001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3], z12.b, z2.b[6] +// CHECK-ENCODING: [0x84,0x19,0x02,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1021984 + +usmlall za.s[w10, 4:7], z1.b, z10.b[10] // 11000001-00001010-11001000-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7], z1.b, z10.b[10] +// CHECK-ENCODING: [0x25,0xc8,0x0a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ac825 + +usmlall za.s[w8, 4:7], z22.b, z14.b[2] // 11000001-00001110-00001010-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7], z22.b, z14.b[2] +// CHECK-ENCODING: [0xc5,0x0a,0x0e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10e0ac5 + +usmlall za.s[w11, 8:11], z9.b, z1.b[13] // 11000001-00000001-11110101-00100110 +// CHECK-INST: usmlall za.s[w11, 8:11], z9.b, z1.b[13] +// CHECK-ENCODING: [0x26,0xf5,0x01,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c101f526 + +usmlall za.s[w9, 12:15], z12.b, z11.b[10] // 11000001-00001011-10101001-10000111 +// CHECK-INST: usmlall za.s[w9, 12:15], z12.b, z11.b[10] +// CHECK-ENCODING: [0x87,0xa9,0x0b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c10ba987 + + +usmlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b // 11000001, 00100000, 00000000, 00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200004 + +usmlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b // 11000001-00100000-00000000-00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200004 + +usmlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b // 11000001, 00100101, 01000001, 01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254145 + +usmlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b // 11000001-00100101-01000001-01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x25,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1254145 + +usmlall za.s[w11, 4:7, vgx2], {z13.b, z14.b}, z8.b // 11000001, 00101000, 01100001, 10100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a5 + +usmlall za.s[w11, 4:7], {z13.b - z14.b}, z8.b // 11000001-00101000-01100001-10100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z13.b, z14.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12861a5 + +usmlall za.s[w11, 4:7, vgx2], {z31.b, z0.b}, z15.b // 11000001, 00101111, 01100011, 11100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e5 + +usmlall za.s[w11, 4:7], {z31.b - z0.b}, z15.b // 11000001-00101111-01100011-11100101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z31.b, z0.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x2f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12f63e5 + +usmlall za.s[w8, 4:7, vgx2], {z17.b, z18.b}, z0.b // 11000001, 00100000, 00000010, 00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200225 + +usmlall za.s[w8, 4:7], {z17.b - z18.b}, z0.b // 11000001-00100000-00000010-00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z17.b, z18.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1200225 + +usmlall za.s[w8, 4:7, vgx2], {z1.b, z2.b}, z14.b // 11000001, 00101110, 00000000, 00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0025 + +usmlall za.s[w8, 4:7], {z1.b - z2.b}, z14.b // 11000001-00101110-00000000-00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e0025 + +usmlall za.s[w10, 0:3, vgx2], {z19.b, z20.b}, z4.b // 11000001, 00100100, 01000010, 01100100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244264 + +usmlall za.s[w10, 0:3], {z19.b - z20.b}, z4.b // 11000001-00100100-01000010-01100100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z19.b, z20.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x24,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1244264 + +usmlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b // 11000001, 00100010, 00000001, 10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220184 + +usmlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b // 11000001-00100010-00000001-10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x22,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1220184 + +usmlall za.s[w10, 4:7, vgx2], {z1.b, z2.b}, z10.b // 11000001, 00101010, 01000000, 00100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4025 + +usmlall za.s[w10, 4:7], {z1.b - z2.b}, z10.b // 11000001-00101010-01000000-00100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z1.b, z2.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x2a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12a4025 + +usmlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b // 11000001, 00101110, 00000010, 11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c5 + +usmlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b // 11000001-00101110-00000010-11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x2e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12e02c5 + +usmlall za.s[w11, 0:3, vgx2], {z9.b, z10.b}, z1.b // 11000001, 00100001, 01100001, 00100100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216124 + +usmlall za.s[w11, 0:3], {z9.b - z10.b}, z1.b // 11000001-00100001-01100001-00100100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z9.b, z10.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x21,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1216124 + +usmlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b // 11000001, 00101011, 00100001, 10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2185 + +usmlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b // 11000001-00101011-00100001-10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x2b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c12b2185 + + +usmlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, z0.b[0] // 11000001, 00010000, 00000000, 00100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100020 + +usmlall za.s[w8, 0:3], {z0.b - z1.b}, z0.b[0] // 11000001-00010000-00000000-00100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x00,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100020 + +usmlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, z5.b[6] // 11000001, 00010101, 01000101, 01100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x65,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154565 + +usmlall za.s[w10, 4:7], {z10.b - z11.b}, z5.b[6] // 11000001-00010101-01000101-01100101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, z5.b[6] +// CHECK-ENCODING: [0x65,0x45,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1154565 + +usmlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, z8.b[15] // 11000001, 00011000, 01101101, 10100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186da7 + +usmlall za.s[w11, 4:7], {z12.b - z13.b}, z8.b[15] // 11000001-00011000-01101101-10100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0x6d,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1186da7 + +usmlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, z15.b[15] // 11000001, 00011111, 01101111, 11100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xe7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fe7 + +usmlall za.s[w11, 4:7], {z30.b - z31.b}, z15.b[15] // 11000001-00011111-01101111-11100111 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, z15.b[15] +// CHECK-ENCODING: [0xe7,0x6f,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11f6fe7 + +usmlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, z0.b[14] // 11000001, 00010000, 00001110, 00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e25 + +usmlall za.s[w8, 4:7], {z16.b - z17.b}, z0.b[14] // 11000001-00010000-00001110-00100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x0e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1100e25 + +usmlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, z14.b[4] // 11000001, 00011110, 00000100, 00100001 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0421 + +usmlall za.s[w8, 4:7], {z0.b - z1.b}, z14.b[4] // 11000001-00011110-00000100-00100001 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x04,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0421 + +usmlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, z4.b[4] // 11000001, 00010100, 01000110, 01100000 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x60,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144660 + +usmlall za.s[w10, 0:3], {z18.b - z19.b}, z4.b[4] // 11000001-00010100-01000110-01100000 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, z4.b[4] +// CHECK-ENCODING: [0x60,0x46,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1144660 + +usmlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, z2.b[8] // 11000001, 00010010, 00001001, 10100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11209a0 + +usmlall za.s[w8, 0:3], {z12.b - z13.b}, z2.b[8] // 11000001-00010010-00001001-10100000 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x09,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11209a0 + +usmlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, z10.b[8] // 11000001, 00011010, 01001000, 00100001 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4821 + +usmlall za.s[w10, 4:7], {z0.b - z1.b}, z10.b[8] // 11000001-00011010-01001000-00100001 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0x48,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11a4821 + +usmlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, z14.b[10] // 11000001, 00011110, 00001010, 11100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xe5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ae5 + +usmlall za.s[w8, 4:7], {z22.b - z23.b}, z14.b[10] // 11000001-00011110-00001010-11100101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, z14.b[10] +// CHECK-ENCODING: [0xe5,0x0a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e0ae5 + +usmlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, z1.b[5] // 11000001, 00010001, 01100101, 00100010 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116522 + +usmlall za.s[w11, 0:3], {z8.b - z9.b}, z1.b[5] // 11000001-00010001-01100101-00100010 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0x65,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1116522 + +usmlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, z11.b[11] // 11000001, 00011011, 00101001, 10100111 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b29a7 + +usmlall za.s[w9, 4:7], {z12.b - z13.b}, z11.b[11] // 11000001-00011011-00101001-10100111 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0x29,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11b29a7 + + +usmlall za.s[w8, 0:3, vgx2], {z0.b, z1.b}, {z0.b, z1.b} // 11000001, 10100000, 00000000, 00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00004 + +usmlall za.s[w8, 0:3], {z0.b - z1.b}, {z0.b - z1.b} // 11000001-10100000-00000000-00000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z0.b, z1.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x00,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a00004 + +usmlall za.s[w10, 4:7, vgx2], {z10.b, z11.b}, {z20.b, z21.b} // 11000001, 10110100, 01000001, 01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x45,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44145 + +usmlall za.s[w10, 4:7], {z10.b - z11.b}, {z20.b - z21.b} // 11000001-10110100-01000001-01000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z10.b, z11.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x45,0x41,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44145 + +usmlall za.s[w11, 4:7, vgx2], {z12.b, z13.b}, {z8.b, z9.b} // 11000001, 10101000, 01100001, 10000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x85,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86185 + +usmlall za.s[w11, 4:7], {z12.b - z13.b}, {z8.b - z9.b} // 11000001-10101000-01100001-10000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z12.b, z13.b }, { z8.b, z9.b } +// CHECK-ENCODING: [0x85,0x61,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a86185 + +usmlall za.s[w11, 4:7, vgx2], {z30.b, z31.b}, {z30.b, z31.b} // 11000001, 10111110, 01100011, 11000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c5 + +usmlall za.s[w11, 4:7], {z30.b - z31.b}, {z30.b - z31.b} // 11000001-10111110-01100011-11000101 +// CHECK, INST: usmlall za.s[w11, 4:7, vgx2], { z30.b, z31.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x63,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be63c5 + +usmlall za.s[w8, 4:7, vgx2], {z16.b, z17.b}, {z16.b, z17.b} // 11000001, 10110000, 00000010, 00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00205 + +usmlall za.s[w8, 4:7], {z16.b - z17.b}, {z16.b - z17.b} // 11000001-10110000-00000010-00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z16.b, z17.b }, { z16.b, z17.b } +// CHECK-ENCODING: [0x05,0x02,0xb0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b00205 + +usmlall za.s[w8, 4:7, vgx2], {z0.b, z1.b}, {z30.b, z31.b} // 11000001, 10111110, 00000000, 00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0005 + +usmlall za.s[w8, 4:7], {z0.b - z1.b}, {z30.b - z31.b} // 11000001-10111110-00000000-00000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z0.b, z1.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be0005 + +usmlall za.s[w10, 0:3, vgx2], {z18.b, z19.b}, {z20.b, z21.b} // 11000001, 10110100, 01000010, 01000100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x44,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44244 + +usmlall za.s[w10, 0:3], {z18.b - z19.b}, {z20.b - z21.b} // 11000001-10110100-01000010-01000100 +// CHECK, INST: usmlall za.s[w10, 0:3, vgx2], { z18.b, z19.b }, { z20.b, z21.b } +// CHECK-ENCODING: [0x44,0x42,0xb4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b44244 + +usmlall za.s[w8, 0:3, vgx2], {z12.b, z13.b}, {z2.b, z3.b} // 11000001, 10100010, 00000001, 10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20184 + +usmlall za.s[w8, 0:3], {z12.b - z13.b}, {z2.b - z3.b} // 11000001-10100010-00000001-10000100 +// CHECK, INST: usmlall za.s[w8, 0:3, vgx2], { z12.b, z13.b }, { z2.b, z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a20184 + +usmlall za.s[w10, 4:7, vgx2], {z0.b, z1.b}, {z26.b, z27.b} // 11000001, 10111010, 01000000, 00000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x05,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4005 + +usmlall za.s[w10, 4:7], {z0.b - z1.b}, {z26.b - z27.b} // 11000001-10111010-01000000-00000101 +// CHECK, INST: usmlall za.s[w10, 4:7, vgx2], { z0.b, z1.b }, { z26.b, z27.b } +// CHECK-ENCODING: [0x05,0x40,0xba,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ba4005 + +usmlall za.s[w8, 4:7, vgx2], {z22.b, z23.b}, {z30.b, z31.b} // 11000001, 10111110, 00000010, 11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c5 + +usmlall za.s[w8, 4:7], {z22.b - z23.b}, {z30.b - z31.b} // 11000001-10111110-00000010-11000101 +// CHECK, INST: usmlall za.s[w8, 4:7, vgx2], { z22.b, z23.b }, { z30.b, z31.b } +// CHECK-ENCODING: [0xc5,0x02,0xbe,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1be02c5 + +usmlall za.s[w11, 0:3, vgx2], {z8.b, z9.b}, {z0.b, z1.b} // 11000001, 10100000, 01100001, 00000100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06104 + +usmlall za.s[w11, 0:3], {z8.b - z9.b}, {z0.b - z1.b} // 11000001-10100000-01100001-00000100 +// CHECK, INST: usmlall za.s[w11, 0:3, vgx2], { z8.b, z9.b }, { z0.b, z1.b } +// CHECK-ENCODING: [0x04,0x61,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a06104 + +usmlall za.s[w9, 4:7, vgx2], {z12.b, z13.b}, {z10.b, z11.b} // 11000001, 10101010, 00100001, 10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x85,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2185 + +usmlall za.s[w9, 4:7], {z12.b - z13.b}, {z10.b - z11.b} // 11000001-10101010-00100001-10000101 +// CHECK, INST: usmlall za.s[w9, 4:7, vgx2], { z12.b, z13.b }, { z10.b, z11.b } +// CHECK-ENCODING: [0x85,0x21,0xaa,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1aa2185 + + +usmlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300004 + +usmlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b // 11000001-00110000-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b +// CHECK-ENCODING: [0x04,0x00,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300004 + +usmlall za.s[w10, 4:7, vgx4], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354145 + +usmlall za.s[w10, 4:7], {z10.b - z13.b}, z5.b // 11000001-00110101-01000001-01000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z10.b - z13.b }, z5.b +// CHECK-ENCODING: [0x45,0x41,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1354145 + +usmlall za.s[w11, 4:7, vgx4], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a5 + +usmlall za.s[w11, 4:7], {z13.b - z16.b}, z8.b // 11000001-00111000-01100001-10100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z13.b - z16.b }, z8.b +// CHECK-ENCODING: [0xa5,0x61,0x38,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13861a5 + +usmlall za.s[w11, 4:7, vgx4], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e5 + +usmlall za.s[w11, 4:7], {z31.b, z0.b, z1.b, z2.b}, z15.b // 11000001-00111111-01100011-11100101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z31.b, z0.b, z1.b, z2.b }, z15.b +// CHECK-ENCODING: [0xe5,0x63,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13f63e5 + +usmlall za.s[w8, 4:7, vgx4], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300225 + +usmlall za.s[w8, 4:7], {z17.b - z20.b}, z0.b // 11000001-00110000-00000010-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z17.b - z20.b }, z0.b +// CHECK-ENCODING: [0x25,0x02,0x30,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1300225 + +usmlall za.s[w8, 4:7, vgx4], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0025 + +usmlall za.s[w8, 4:7], {z1.b - z4.b}, z14.b // 11000001-00111110-00000000-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z1.b - z4.b }, z14.b +// CHECK-ENCODING: [0x25,0x00,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e0025 + +usmlall za.s[w10, 0:3, vgx4], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344264 + +usmlall za.s[w10, 0:3], {z19.b - z22.b}, z4.b // 11000001-00110100-01000010-01100100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z19.b - z22.b }, z4.b +// CHECK-ENCODING: [0x64,0x42,0x34,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1344264 + +usmlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320184 + +usmlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b // 11000001-00110010-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b +// CHECK-ENCODING: [0x84,0x01,0x32,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1320184 + +usmlall za.s[w10, 4:7, vgx4], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4025 + +usmlall za.s[w10, 4:7], {z1.b - z4.b}, z10.b // 11000001-00111010-01000000-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z1.b - z4.b }, z10.b +// CHECK-ENCODING: [0x25,0x40,0x3a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13a4025 + +usmlall za.s[w8, 4:7, vgx4], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c5 + +usmlall za.s[w8, 4:7], {z22.b - z25.b}, z14.b // 11000001-00111110-00000010-11000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z22.b - z25.b }, z14.b +// CHECK-ENCODING: [0xc5,0x02,0x3e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13e02c5 + +usmlall za.s[w11, 0:3, vgx4], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316124 + +usmlall za.s[w11, 0:3], {z9.b - z12.b}, z1.b // 11000001-00110001-01100001-00100100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z9.b - z12.b }, z1.b +// CHECK-ENCODING: [0x24,0x61,0x31,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1316124 + +usmlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2185 + +usmlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b // 11000001-00111011-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b +// CHECK-ENCODING: [0x85,0x21,0x3b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13b2185 + + +usmlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108020 + +usmlall za.s[w8, 0:3], {z0.b - z3.b}, z0.b[0] // 11000001-00010000-10000000-00100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x20,0x80,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108020 + +usmlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x25,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c525 + +usmlall za.s[w10, 4:7], {z8.b - z11.b}, z5.b[6] // 11000001-00010101-11000101-00100101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, z5.b[6] +// CHECK-ENCODING: [0x25,0xc5,0x15,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c115c525 + +usmlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118eda7 + +usmlall za.s[w11, 4:7], {z12.b - z15.b}, z8.b[15] // 11000001-00011000-11101101-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, z8.b[15] +// CHECK-ENCODING: [0xa7,0xed,0x18,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c118eda7 + +usmlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0xa7,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fefa7 + +usmlall za.s[w11, 4:7], {z28.b - z31.b}, z15.b[15] // 11000001-00011111-11101111-10100111 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, z15.b[15] +// CHECK-ENCODING: [0xa7,0xef,0x1f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11fefa7 + +usmlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e25 + +usmlall za.s[w8, 4:7], {z16.b - z19.b}, z0.b[14] // 11000001-00010000-10001110-00100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, z0.b[14] +// CHECK-ENCODING: [0x25,0x8e,0x10,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1108e25 + +usmlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00100001 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8421 + +usmlall za.s[w8, 4:7], {z0.b - z3.b}, z14.b[4] // 11000001-00011110-10000100-00100001 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, z14.b[4] +// CHECK-ENCODING: [0x21,0x84,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8421 + +usmlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00100000 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x20,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c620 + +usmlall za.s[w10, 0:3], {z16.b - z19.b}, z4.b[4] // 11000001-00010100-11000110-00100000 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, z4.b[4] +// CHECK-ENCODING: [0x20,0xc6,0x14,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c114c620 + +usmlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11289a0 + +usmlall za.s[w8, 0:3], {z12.b - z15.b}, z2.b[8] // 11000001-00010010-10001001-10100000 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, z2.b[8] +// CHECK-ENCODING: [0xa0,0x89,0x12,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11289a0 + +usmlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00100001 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac821 + +usmlall za.s[w10, 4:7], {z0.b - z3.b}, z10.b[8] // 11000001-00011010-11001000-00100001 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, z10.b[8] +// CHECK-ENCODING: [0x21,0xc8,0x1a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ac821 + +usmlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0xa5,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8aa5 + +usmlall za.s[w8, 4:7], {z20.b - z23.b}, z14.b[10] // 11000001-00011110-10001010-10100101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, z14.b[10] +// CHECK-ENCODING: [0xa5,0x8a,0x1e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11e8aa5 + +usmlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00100010 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e522 + +usmlall za.s[w11, 0:3], {z8.b - z11.b}, z1.b[5] // 11000001-00010001-11100101-00100010 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, z1.b[5] +// CHECK-ENCODING: [0x22,0xe5,0x11,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c111e522 + +usmlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10100111 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba9a7 + +usmlall za.s[w9, 4:7], {z12.b - z15.b}, z11.b[11] // 11000001-00011011-10101001-10100111 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, z11.b[11] +// CHECK-ENCODING: [0xa7,0xa9,0x1b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c11ba9a7 + + +usmlall za.s[w8, 0:3, vgx4], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10004 + +usmlall za.s[w8, 0:3], {z0.b - z3.b}, {z0.b - z3.b} // 11000001-10100001-00000000-00000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x00,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10004 + +usmlall za.s[w10, 4:7, vgx4], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x05,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54105 + +usmlall za.s[w10, 4:7], {z8.b - z11.b}, {z20.b - z23.b} // 11000001-10110101-01000001-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z8.b - z11.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x05,0x41,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54105 + +usmlall za.s[w11, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96185 + +usmlall za.s[w11, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-01100001-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x61,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a96185 + +usmlall za.s[w11, 4:7, vgx4], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6385 + +usmlall za.s[w11, 4:7], {z28.b - z31.b}, {z28.b - z31.b} // 11000001-10111101-01100011-10000101 +// CHECK-INST: usmlall za.s[w11, 4:7, vgx4], { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x63,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd6385 + +usmlall za.s[w8, 4:7, vgx4], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x05,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10205 + +usmlall za.s[w8, 4:7], {z16.b - z19.b}, {z16.b - z19.b} // 11000001-10110001-00000010-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z16.b - z19.b }, { z16.b - z19.b } +// CHECK-ENCODING: [0x05,0x02,0xb1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b10205 + +usmlall za.s[w8, 4:7, vgx4], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0005 + +usmlall za.s[w8, 4:7], {z0.b - z3.b}, {z28.b - z31.b} // 11000001-10111101-00000000-00000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z0.b - z3.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x05,0x00,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0005 + +usmlall za.s[w10, 0:3, vgx4], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x04,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54204 + +usmlall za.s[w10, 0:3], {z16.b - z19.b}, {z20.b - z23.b} // 11000001-10110101-01000010-00000100 +// CHECK-INST: usmlall za.s[w10, 0:3, vgx4], { z16.b - z19.b }, { z20.b - z23.b } +// CHECK-ENCODING: [0x04,0x42,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b54204 + +usmlall za.s[w8, 0:3, vgx4], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10184 + +usmlall za.s[w8, 0:3], {z12.b - z15.b}, {z0.b - z3.b} // 11000001-10100001-00000001-10000100 +// CHECK-INST: usmlall za.s[w8, 0:3, vgx4], { z12.b - z15.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x84,0x01,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a10184 + +usmlall za.s[w10, 4:7, vgx4], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x05,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94005 + +usmlall za.s[w10, 4:7], {z0.b - z3.b}, {z24.b - z27.b} // 11000001-10111001-01000000-00000101 +// CHECK-INST: usmlall za.s[w10, 4:7, vgx4], { z0.b - z3.b }, { z24.b - z27.b } +// CHECK-ENCODING: [0x05,0x40,0xb9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b94005 + +usmlall za.s[w8, 4:7, vgx4], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0285 + +usmlall za.s[w8, 4:7], {z20.b - z23.b}, {z28.b - z31.b} // 11000001-10111101-00000010-10000101 +// CHECK-INST: usmlall za.s[w8, 4:7, vgx4], { z20.b - z23.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x85,0x02,0xbd,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bd0285 + +usmlall za.s[w11, 0:3, vgx4], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16104 + +usmlall za.s[w11, 0:3], {z8.b - z11.b}, {z0.b - z3.b} // 11000001-10100001-01100001-00000100 +// CHECK-INST: usmlall za.s[w11, 0:3, vgx4], { z8.b - z11.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x04,0x61,0xa1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a16104 + +usmlall za.s[w9, 4:7, vgx4], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92185 + +usmlall za.s[w9, 4:7], {z12.b - z15.b}, {z8.b - z11.b} // 11000001-10101001-00100001-10000101 +// CHECK-INST: usmlall za.s[w9, 4:7, vgx4], { z12.b - z15.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x85,0x21,0xa9,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a92185 + diff --git a/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s b/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uunpk-diagnostics.s @@ -0,0 +1,32 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uunpk {z0.h-z2.h}, z0.b +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.h-z2.h}, z0.b +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z1.s-z2.s}, z0.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uunpk {z1.s-z2.s}, z0.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uunpk {z0.d-z5.d}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.s-z3.s}, {z9.h-z11.h} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Register Suffix + +uunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uunpk {z0.s-z3.s}, {z8.s-z9.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uunpk.s b/llvm/test/MC/AArch64/SME2/uunpk.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uunpk.s @@ -0,0 +1,163 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uunpk {z0.h - z1.h}, z0.b // 11000001-01100101-11100000-00000001 +// CHECK-INST: uunpk { z0.h, z1.h }, z0.b +// CHECK-ENCODING: [0x01,0xe0,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e001 + +uunpk {z20.h - z21.h}, z10.b // 11000001-01100101-11100001-01010101 +// CHECK-INST: uunpk { z20.h, z21.h }, z10.b +// CHECK-ENCODING: [0x55,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e155 + +uunpk {z22.h - z23.h}, z13.b // 11000001-01100101-11100001-10110111 +// CHECK-INST: uunpk { z22.h, z23.h }, z13.b +// CHECK-ENCODING: [0xb7,0xe1,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e1b7 + +uunpk {z30.h - z31.h}, z31.b // 11000001-01100101-11100011-11111111 +// CHECK-INST: uunpk { z30.h, z31.h }, z31.b +// CHECK-ENCODING: [0xff,0xe3,0x65,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c165e3ff + + +uunpk {z0.s - z1.s}, z0.h // 11000001-10100101-11100000-00000001 +// CHECK-INST: uunpk { z0.s, z1.s }, z0.h +// CHECK-ENCODING: [0x01,0xe0,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e001 + +uunpk {z20.s - z21.s}, z10.h // 11000001-10100101-11100001-01010101 +// CHECK-INST: uunpk { z20.s, z21.s }, z10.h +// CHECK-ENCODING: [0x55,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e155 + +uunpk {z22.s - z23.s}, z13.h // 11000001-10100101-11100001-10110111 +// CHECK-INST: uunpk { z22.s, z23.s }, z13.h +// CHECK-ENCODING: [0xb7,0xe1,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e1b7 + +uunpk {z30.s - z31.s}, z31.h // 11000001-10100101-11100011-11111111 +// CHECK-INST: uunpk { z30.s, z31.s }, z31.h +// CHECK-ENCODING: [0xff,0xe3,0xa5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a5e3ff + + +uunpk {z0.d - z1.d}, z0.s // 11000001-11100101-11100000-00000001 +// CHECK-INST: uunpk { z0.d, z1.d }, z0.s +// CHECK-ENCODING: [0x01,0xe0,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e001 + +uunpk {z20.d - z21.d}, z10.s // 11000001-11100101-11100001-01010101 +// CHECK-INST: uunpk { z20.d, z21.d }, z10.s +// CHECK-ENCODING: [0x55,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e155 + +uunpk {z22.d - z23.d}, z13.s // 11000001-11100101-11100001-10110111 +// CHECK-INST: uunpk { z22.d, z23.d }, z13.s +// CHECK-ENCODING: [0xb7,0xe1,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e1b7 + +uunpk {z30.d - z31.d}, z31.s // 11000001-11100101-11100011-11111111 +// CHECK-INST: uunpk { z30.d, z31.d }, z31.s +// CHECK-ENCODING: [0xff,0xe3,0xe5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e5e3ff + + +uunpk {z0.h - z3.h}, {z0.b - z1.b} // 11000001-01110101-11100000-00000001 +// CHECK-INST: uunpk { z0.h - z3.h }, { z0.b, z1.b } +// CHECK-ENCODING: [0x01,0xe0,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e001 + +uunpk {z20.h - z23.h}, {z10.b - z11.b} // 11000001-01110101-11100001-01010101 +// CHECK-INST: uunpk { z20.h - z23.h }, { z10.b, z11.b } +// CHECK-ENCODING: [0x55,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e155 + +uunpk {z20.h - z23.h}, {z12.b - z13.b} // 11000001-01110101-11100001-10010101 +// CHECK-INST: uunpk { z20.h - z23.h }, { z12.b, z13.b } +// CHECK-ENCODING: [0x95,0xe1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e195 + +uunpk {z28.h - z31.h}, {z30.b - z31.b} // 11000001-01110101-11100011-11011101 +// CHECK-INST: uunpk { z28.h - z31.h }, { z30.b, z31.b } +// CHECK-ENCODING: [0xdd,0xe3,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175e3dd + + +uunpk {z0.s - z3.s}, {z0.h - z1.h} // 11000001-10110101-11100000-00000001 +// CHECK-INST: uunpk { z0.s - z3.s }, { z0.h, z1.h } +// CHECK-ENCODING: [0x01,0xe0,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e001 + +uunpk {z20.s - z23.s}, {z10.h - z11.h} // 11000001-10110101-11100001-01010101 +// CHECK-INST: uunpk { z20.s - z23.s }, { z10.h, z11.h } +// CHECK-ENCODING: [0x55,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e155 + +uunpk {z20.s - z23.s}, {z12.h - z13.h} // 11000001-10110101-11100001-10010101 +// CHECK-INST: uunpk { z20.s - z23.s }, { z12.h, z13.h } +// CHECK-ENCODING: [0x95,0xe1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e195 + +uunpk {z28.s - z31.s}, {z30.h - z31.h} // 11000001-10110101-11100011-11011101 +// CHECK-INST: uunpk { z28.s - z31.s }, { z30.h, z31.h } +// CHECK-ENCODING: [0xdd,0xe3,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5e3dd + + +uunpk {z0.d - z3.d}, {z0.s - z1.s} // 11000001-11110101-11100000-00000001 +// CHECK-INST: uunpk { z0.d - z3.d }, { z0.s, z1.s } +// CHECK-ENCODING: [0x01,0xe0,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e001 + +uunpk {z20.d - z23.d}, {z10.s - z11.s} // 11000001-11110101-11100001-01010101 +// CHECK-INST: uunpk { z20.d - z23.d }, { z10.s, z11.s } +// CHECK-ENCODING: [0x55,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e155 + +uunpk {z20.d - z23.d}, {z12.s - z13.s} // 11000001-11110101-11100001-10010101 +// CHECK-INST: uunpk { z20.d - z23.d }, { z12.s, z13.s } +// CHECK-ENCODING: [0x95,0xe1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e195 + +uunpk {z28.d - z31.d}, {z30.s - z31.s} // 11000001-11110101-11100011-11011101 +// CHECK-INST: uunpk { z28.d - z31.d }, { z30.s, z31.s } +// CHECK-ENCODING: [0xdd,0xe3,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5e3dd + diff --git a/llvm/test/MC/AArch64/SME2/uvdot-diagnostics.s b/llvm/test/MC/AArch64/SME2/uvdot-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uvdot-diagnostics.s @@ -0,0 +1,69 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Out of range index offset + +uvdot za.s[w8, 8, vgx4], {z0.b-z3.b}, z0.b[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: uvdot za.s[w8, 8, vgx4], {z0.b-z3.b}, z0.b[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uvdot za.s[w8, -1, vgx2], {z0.b-z1.b}, z0.b[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: immediate must be an integer in range [0, 7]. +// CHECK-NEXT: uvdot za.s[w8, -1, vgx2], {z0.b-z1.b}, z0.b[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector select register + +uvdot za.s[w7, 7, vgx2], {z0.h-z1.h}, z0.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: operand must be a register in range [w8, w11] +// CHECK-NEXT: uvdot za.s[w7, 7, vgx2], {z0.h-z1.h}, z0.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uvdot za.d[w12, 7, vgx4], {z0.h-z3.h}, z0.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: uvdot za.d[w12, 7, vgx4], {z0.h-z3.h}, z0.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector list + +uvdot za.d[w8, 0, vgx4], {z0.h-z4.h}, z0.h[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uvdot za.d[w8, 0, vgx4], {z0.h-z4.h}, z0.h[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uvdot za.s[w8, 0, vgx4], {z1.b-z4.b}, z0.b[1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element type +// CHECK-NEXT: uvdot za.s[w8, 0, vgx4], {z1.b-z4.b}, z0.b[1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid Matrix Operand + +uvdot za.b[w8, 0, vgx4], {z0.h-z3.h}, z4.h[7] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid matrix operand, expected suffix .s +// CHECK-NEXT: uvdot za.b[w8, 0, vgx4], {z0.h-z3.h}, z4.h[7] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid vector grouping + +uvdot za.s[w8, 0, vgx2], {z0.b-z3.b}, z14.b[3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uvdot za.s[w8, 0, vgx2], {z0.b-z3.b}, z14.b[3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid lane index + +uvdot za.s[w11, 7], {z30.h-z31.h}, z15.h[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: uvdot za.s[w11, 7], {z30.h-z31.h}, z15.h[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uvdot za.s[w11, 7], {z30.h-z31.h}, z15.h[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3] +// CHECK-NEXT: za.s[w11, 7], {z30.h-z31.h}, z15.h[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SME2/uvdot.s b/llvm/test/MC/AArch64/SME2/uvdot.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uvdot.s @@ -0,0 +1,448 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2,+sme-i16i64 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2,+sme-i16i64 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2,+sme-i16i64 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uvdot za.s[w8, 0, vgx2], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x30,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500030 + +uvdot za.s[w8, 0], {z0.h, z1.h}, z0.h[0] // 11000001-01010000-00000000-00110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z0.h[0] +// CHECK-ENCODING: [0x30,0x00,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500030 + +uvdot za.s[w10, 5, vgx2], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01110101 +// CHECK-INST: uvdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x75,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554575 + +uvdot za.s[w10, 5], {z10.h, z11.h}, z5.h[1] // 11000001-01010101-01000101-01110101 +// CHECK-INST: uvdot za.s[w10, 5, vgx2], { z10.h, z11.h }, z5.h[1] +// CHECK-ENCODING: [0x75,0x45,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1554575 + +uvdot za.s[w11, 7, vgx2], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0xb7,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586db7 + +uvdot za.s[w11, 7], {z12.h, z13.h}, z8.h[3] // 11000001-01011000-01101101-10110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx2], { z12.h, z13.h }, z8.h[3] +// CHECK-ENCODING: [0xb7,0x6d,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1586db7 + +uvdot za.s[w11, 7, vgx2], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xf7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6ff7 + +uvdot za.s[w11, 7], {z30.h, z31.h}, z15.h[3] // 11000001-01011111-01101111-11110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx2], { z30.h, z31.h }, z15.h[3] +// CHECK-ENCODING: [0xf7,0x6f,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15f6ff7 + +uvdot za.s[w8, 5, vgx2], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x35,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e35 + +uvdot za.s[w8, 5], {z16.h, z17.h}, z0.h[3] // 11000001-01010000-00001110-00110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx2], { z16.h, z17.h }, z0.h[3] +// CHECK-ENCODING: [0x35,0x0e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1500e35 + +uvdot za.s[w8, 1, vgx2], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00110001 +// CHECK-INST: uvdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x31,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0431 + +uvdot za.s[w8, 1], {z0.h, z1.h}, z14.h[1] // 11000001-01011110-00000100-00110001 +// CHECK-INST: uvdot za.s[w8, 1, vgx2], { z0.h, z1.h }, z14.h[1] +// CHECK-ENCODING: [0x31,0x04,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0431 + +uvdot za.s[w10, 0, vgx2], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01110000 +// CHECK-INST: uvdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x70,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544670 + +uvdot za.s[w10, 0], {z18.h, z19.h}, z4.h[1] // 11000001-01010100-01000110-01110000 +// CHECK-INST: uvdot za.s[w10, 0, vgx2], { z18.h, z19.h }, z4.h[1] +// CHECK-ENCODING: [0x70,0x46,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1544670 + +uvdot za.s[w8, 0, vgx2], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0xb0,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15209b0 + +uvdot za.s[w8, 0], {z12.h, z13.h}, z2.h[2] // 11000001-01010010-00001001-10110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx2], { z12.h, z13.h }, z2.h[2] +// CHECK-ENCODING: [0xb0,0x09,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15209b0 + +uvdot za.s[w10, 1, vgx2], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00110001 +// CHECK-INST: uvdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x31,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4831 + +uvdot za.s[w10, 1], {z0.h, z1.h}, z10.h[2] // 11000001-01011010-01001000-00110001 +// CHECK-INST: uvdot za.s[w10, 1, vgx2], { z0.h, z1.h }, z10.h[2] +// CHECK-ENCODING: [0x31,0x48,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15a4831 + +uvdot za.s[w8, 5, vgx2], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xf5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0af5 + +uvdot za.s[w8, 5], {z22.h, z23.h}, z14.h[2] // 11000001-01011110-00001010-11110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx2], { z22.h, z23.h }, z14.h[2] +// CHECK-ENCODING: [0xf5,0x0a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e0af5 + +uvdot za.s[w11, 2, vgx2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00110010 +// CHECK-INST: uvdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x32,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516532 + +uvdot za.s[w11, 2], {z8.h, z9.h}, z1.h[1] // 11000001-01010001-01100101-00110010 +// CHECK-INST: uvdot za.s[w11, 2, vgx2], { z8.h, z9.h }, z1.h[1] +// CHECK-ENCODING: [0x32,0x65,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1516532 + +uvdot za.s[w9, 7, vgx2], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10110111 +// CHECK-INST: uvdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0xb7,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b29b7 + +uvdot za.s[w9, 7], {z12.h, z13.h}, z11.h[2] // 11000001-01011011-00101001-10110111 +// CHECK-INST: uvdot za.s[w9, 7, vgx2], { z12.h, z13.h }, z11.h[2] +// CHECK-ENCODING: [0xb7,0x29,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15b29b7 + + +uvdot za.s[w8, 0, vgx4], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10000000-00110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x30,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508030 + +uvdot za.s[w8, 0], {z0.b - z3.b}, z0.b[0] // 11000001-01010000-10000000-00110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx4], { z0.b - z3.b }, z0.b[0] +// CHECK-ENCODING: [0x30,0x80,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508030 + +uvdot za.s[w10, 5, vgx4], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11000101-00110101 +// CHECK-INST: uvdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x35,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c535 + +uvdot za.s[w10, 5], {z8.b - z11.b}, z5.b[1] // 11000001-01010101-11000101-00110101 +// CHECK-INST: uvdot za.s[w10, 5, vgx4], { z8.b - z11.b }, z5.b[1] +// CHECK-ENCODING: [0x35,0xc5,0x55,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c155c535 + +uvdot za.s[w11, 7, vgx4], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11101101-10110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xb7,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158edb7 + +uvdot za.s[w11, 7], {z12.b - z15.b}, z8.b[3] // 11000001-01011000-11101101-10110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx4], { z12.b - z15.b }, z8.b[3] +// CHECK-ENCODING: [0xb7,0xed,0x58,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c158edb7 + +uvdot za.s[w11, 7, vgx4], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11101111-10110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xb7,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fefb7 + +uvdot za.s[w11, 7], {z28.b - z31.b}, z15.b[3] // 11000001-01011111-11101111-10110111 +// CHECK-INST: uvdot za.s[w11, 7, vgx4], { z28.b - z31.b }, z15.b[3] +// CHECK-ENCODING: [0xb7,0xef,0x5f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15fefb7 + +uvdot za.s[w8, 5, vgx4], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10001110-00110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x35,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e35 + +uvdot za.s[w8, 5], {z16.b - z19.b}, z0.b[3] // 11000001-01010000-10001110-00110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx4], { z16.b - z19.b }, z0.b[3] +// CHECK-ENCODING: [0x35,0x8e,0x50,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1508e35 + +uvdot za.s[w8, 1, vgx4], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10000100-00110001 +// CHECK-INST: uvdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x31,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8431 + +uvdot za.s[w8, 1], {z0.b - z3.b}, z14.b[1] // 11000001-01011110-10000100-00110001 +// CHECK-INST: uvdot za.s[w8, 1, vgx4], { z0.b - z3.b }, z14.b[1] +// CHECK-ENCODING: [0x31,0x84,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8431 + +uvdot za.s[w10, 0, vgx4], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11000110-00110000 +// CHECK-INST: uvdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x30,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c630 + +uvdot za.s[w10, 0], {z16.b - z19.b}, z4.b[1] // 11000001-01010100-11000110-00110000 +// CHECK-INST: uvdot za.s[w10, 0, vgx4], { z16.b - z19.b }, z4.b[1] +// CHECK-ENCODING: [0x30,0xc6,0x54,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c154c630 + +uvdot za.s[w8, 0, vgx4], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10001001-10110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb0,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15289b0 + +uvdot za.s[w8, 0], {z12.b - z15.b}, z2.b[2] // 11000001-01010010-10001001-10110000 +// CHECK-INST: uvdot za.s[w8, 0, vgx4], { z12.b - z15.b }, z2.b[2] +// CHECK-ENCODING: [0xb0,0x89,0x52,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15289b0 + +uvdot za.s[w10, 1, vgx4], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11001000-00110001 +// CHECK-INST: uvdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x31,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac831 + +uvdot za.s[w10, 1], {z0.b - z3.b}, z10.b[2] // 11000001-01011010-11001000-00110001 +// CHECK-INST: uvdot za.s[w10, 1, vgx4], { z0.b - z3.b }, z10.b[2] +// CHECK-ENCODING: [0x31,0xc8,0x5a,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ac831 + +uvdot za.s[w8, 5, vgx4], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10001010-10110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xb5,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8ab5 + +uvdot za.s[w8, 5], {z20.b - z23.b}, z14.b[2] // 11000001-01011110-10001010-10110101 +// CHECK-INST: uvdot za.s[w8, 5, vgx4], { z20.b - z23.b }, z14.b[2] +// CHECK-ENCODING: [0xb5,0x8a,0x5e,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15e8ab5 + +uvdot za.s[w11, 2, vgx4], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11100101-00110010 +// CHECK-INST: uvdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x32,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e532 + +uvdot za.s[w11, 2], {z8.b - z11.b}, z1.b[1] // 11000001-01010001-11100101-00110010 +// CHECK-INST: uvdot za.s[w11, 2, vgx4], { z8.b - z11.b }, z1.b[1] +// CHECK-ENCODING: [0x32,0xe5,0x51,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c151e532 + +uvdot za.s[w9, 7, vgx4], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10101001-10110111 +// CHECK-INST: uvdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xb7,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba9b7 + +uvdot za.s[w9, 7], {z12.b - z15.b}, z11.b[2] // 11000001-01011011-10101001-10110111 +// CHECK-INST: uvdot za.s[w9, 7, vgx4], { z12.b - z15.b }, z11.b[2] +// CHECK-ENCODING: [0xb7,0xa9,0x5b,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c15ba9b7 + + +uvdot za.d[w8, 0, vgx4], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10001000-00011000 +// CHECK-INST: uvdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x88,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08818 + +uvdot za.d[w8, 0], {z0.h - z3.h}, z0.h[0] // 11000001-11010000-10001000-00011000 +// CHECK-INST: uvdot za.d[w8, 0, vgx4], { z0.h - z3.h }, z0.h[0] +// CHECK-ENCODING: [0x18,0x88,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08818 + +uvdot za.d[w10, 5, vgx4], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11001101-00011101 +// CHECK-INST: uvdot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x1d,0xcd,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5cd1d + +uvdot za.d[w10, 5], {z8.h - z11.h}, z5.h[1] // 11000001-11010101-11001101-00011101 +// CHECK-INST: uvdot za.d[w10, 5, vgx4], { z8.h - z11.h }, z5.h[1] +// CHECK-ENCODING: [0x1d,0xcd,0xd5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d5cd1d + +uvdot za.d[w11, 7, vgx4], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11101101-10011111 +// CHECK-INST: uvdot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x9f,0xed,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8ed9f + +uvdot za.d[w11, 7], {z12.h - z15.h}, z8.h[1] // 11000001-11011000-11101101-10011111 +// CHECK-INST: uvdot za.d[w11, 7, vgx4], { z12.h - z15.h }, z8.h[1] +// CHECK-ENCODING: [0x9f,0xed,0xd8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d8ed9f + +uvdot za.d[w11, 7, vgx4], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11101111-10011111 +// CHECK-INST: uvdot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x9f,0xef,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfef9f + +uvdot za.d[w11, 7], {z28.h - z31.h}, z15.h[1] // 11000001-11011111-11101111-10011111 +// CHECK-INST: uvdot za.d[w11, 7, vgx4], { z28.h - z31.h }, z15.h[1] +// CHECK-ENCODING: [0x9f,0xef,0xdf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dfef9f + +uvdot za.d[w8, 5, vgx4], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10001110-00011101 +// CHECK-INST: uvdot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x1d,0x8e,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08e1d + +uvdot za.d[w8, 5], {z16.h - z19.h}, z0.h[1] // 11000001-11010000-10001110-00011101 +// CHECK-INST: uvdot za.d[w8, 5, vgx4], { z16.h - z19.h }, z0.h[1] +// CHECK-ENCODING: [0x1d,0x8e,0xd0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d08e1d + +uvdot za.d[w8, 1, vgx4], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10001100-00011001 +// CHECK-INST: uvdot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x8c,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8c19 + +uvdot za.d[w8, 1], {z0.h - z3.h}, z14.h[1] // 11000001-11011110-10001100-00011001 +// CHECK-INST: uvdot za.d[w8, 1, vgx4], { z0.h - z3.h }, z14.h[1] +// CHECK-ENCODING: [0x19,0x8c,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8c19 + +uvdot za.d[w10, 0, vgx4], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11001110-00011000 +// CHECK-INST: uvdot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x18,0xce,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4ce18 + +uvdot za.d[w10, 0], {z16.h - z19.h}, z4.h[1] // 11000001-11010100-11001110-00011000 +// CHECK-INST: uvdot za.d[w10, 0, vgx4], { z16.h - z19.h }, z4.h[1] +// CHECK-ENCODING: [0x18,0xce,0xd4,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d4ce18 + +uvdot za.d[w8, 0, vgx4], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10001001-10011000 +// CHECK-INST: uvdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x89,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28998 + +uvdot za.d[w8, 0], {z12.h - z15.h}, z2.h[0] // 11000001-11010010-10001001-10011000 +// CHECK-INST: uvdot za.d[w8, 0, vgx4], { z12.h - z15.h }, z2.h[0] +// CHECK-ENCODING: [0x98,0x89,0xd2,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d28998 + +uvdot za.d[w10, 1, vgx4], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11001000-00011001 +// CHECK-INST: uvdot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc8,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac819 + +uvdot za.d[w10, 1], {z0.h - z3.h}, z10.h[0] // 11000001-11011010-11001000-00011001 +// CHECK-INST: uvdot za.d[w10, 1, vgx4], { z0.h - z3.h }, z10.h[0] +// CHECK-ENCODING: [0x19,0xc8,0xda,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dac819 + +uvdot za.d[w8, 5, vgx4], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10001010-10011101 +// CHECK-INST: uvdot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x9d,0x8a,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8a9d + +uvdot za.d[w8, 5], {z20.h - z23.h}, z14.h[0] // 11000001-11011110-10001010-10011101 +// CHECK-INST: uvdot za.d[w8, 5, vgx4], { z20.h - z23.h }, z14.h[0] +// CHECK-ENCODING: [0x9d,0x8a,0xde,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1de8a9d + +uvdot za.d[w11, 2, vgx4], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11101101-00011010 +// CHECK-INST: uvdot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0xed,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1ed1a + +uvdot za.d[w11, 2], {z8.h - z11.h}, z1.h[1] // 11000001-11010001-11101101-00011010 +// CHECK-INST: uvdot za.d[w11, 2, vgx4], { z8.h - z11.h }, z1.h[1] +// CHECK-ENCODING: [0x1a,0xed,0xd1,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1d1ed1a + +uvdot za.d[w9, 7, vgx4], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10101001-10011111 +// CHECK-INST: uvdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x9f,0xa9,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba99f + +uvdot za.d[w9, 7], {z12.h - z15.h}, z11.h[0] // 11000001-11011011-10101001-10011111 +// CHECK-INST: uvdot za.d[w9, 7, vgx4], { z12.h - z15.h }, z11.h[0] +// CHECK-ENCODING: [0x9f,0xa9,0xdb,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1dba99f + diff --git a/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s b/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uzp-diagnostics.s @@ -0,0 +1,25 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +uzp {z0.q-z2.q}, z0.q, z0.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: uzp {z0.q-z2.q}, z0.q, z0.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z21.h-z22.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: uzp {z21.h-z22.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z0.s-z4.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: uzp {z0.s-z4.s}, {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +uzp {z20.b-z23.b}, {z9.b-z12.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: uzp {z20.b-z23.b}, {z9.b-z12.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/uzp.s b/llvm/test/MC/AArch64/SME2/uzp.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/uzp.s @@ -0,0 +1,263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +uzp {z0.q - z1.q}, z0.q, z0.q // 11000001-00100000-11010100-00000001 +// CHECK-INST: uzp { z0.q, z1.q }, z0.q, z0.q +// CHECK-ENCODING: [0x01,0xd4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d401 + +uzp {z20.q - z21.q}, z10.q, z21.q // 11000001-00110101-11010101-01010101 +// CHECK-INST: uzp { z20.q, z21.q }, z10.q, z21.q +// CHECK-ENCODING: [0x55,0xd5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d555 + +uzp {z22.q - z23.q}, z13.q, z8.q // 11000001-00101000-11010101-10110111 +// CHECK-INST: uzp { z22.q, z23.q }, z13.q, z8.q +// CHECK-ENCODING: [0xb7,0xd5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d5b7 + +uzp {z30.q - z31.q}, z31.q, z31.q // 11000001-00111111-11010111-11111111 +// CHECK-INST: uzp { z30.q, z31.q }, z31.q, z31.q +// CHECK-ENCODING: [0xff,0xd7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd7ff + + +uzp {z0.h - z1.h}, z0.h, z0.h // 11000001-01100000-11010000-00000001 +// CHECK-INST: uzp { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x01,0xd0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d001 + +uzp {z20.h - z21.h}, z10.h, z21.h // 11000001-01110101-11010001-01010101 +// CHECK-INST: uzp { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x55,0xd1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d155 + +uzp {z22.h - z23.h}, z13.h, z8.h // 11000001-01101000-11010001-10110111 +// CHECK-INST: uzp { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb7,0xd1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d1b7 + +uzp {z30.h - z31.h}, z31.h, z31.h // 11000001-01111111-11010011-11111111 +// CHECK-INST: uzp { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xff,0xd3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fd3ff + + +uzp {z0.s - z1.s}, z0.s, z0.s // 11000001-10100000-11010000-00000001 +// CHECK-INST: uzp { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x01,0xd0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d001 + +uzp {z20.s - z21.s}, z10.s, z21.s // 11000001-10110101-11010001-01010101 +// CHECK-INST: uzp { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x55,0xd1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5d155 + +uzp {z22.s - z23.s}, z13.s, z8.s // 11000001-10101000-11010001-10110111 +// CHECK-INST: uzp { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb7,0xd1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8d1b7 + +uzp {z30.s - z31.s}, z31.s, z31.s // 11000001-10111111-11010011-11111111 +// CHECK-INST: uzp { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xff,0xd3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfd3ff + + +uzp {z0.d - z1.d}, z0.d, z0.d // 11000001-11100000-11010000-00000001 +// CHECK-INST: uzp { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x01,0xd0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d001 + +uzp {z20.d - z21.d}, z10.d, z21.d // 11000001-11110101-11010001-01010101 +// CHECK-INST: uzp { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x55,0xd1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d155 + +uzp {z22.d - z23.d}, z13.d, z8.d // 11000001-11101000-11010001-10110111 +// CHECK-INST: uzp { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb7,0xd1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d1b7 + +uzp {z30.d - z31.d}, z31.d, z31.d // 11000001-11111111-11010011-11111111 +// CHECK-INST: uzp { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xff,0xd3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd3ff + + +uzp {z0.b - z1.b}, z0.b, z0.b // 11000001-00100000-11010000-00000001 +// CHECK-INST: uzp { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x01,0xd0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d001 + +uzp {z20.b - z21.b}, z10.b, z21.b // 11000001-00110101-11010001-01010101 +// CHECK-INST: uzp { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x55,0xd1,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d155 + +uzp {z22.b - z23.b}, z13.b, z8.b // 11000001-00101000-11010001-10110111 +// CHECK-INST: uzp { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb7,0xd1,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d1b7 + +uzp {z30.b - z31.b}, z31.b, z31.b // 11000001-00111111-11010011-11111111 +// CHECK-INST: uzp { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xff,0xd3,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd3ff + + +uzp {z0.q - z3.q}, {z0.q - z3.q} // 11000001-00110111-11100000-00000010 +// CHECK-INST: uzp { z0.q - z3.q }, { z0.q - z3.q } +// CHECK-ENCODING: [0x02,0xe0,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e002 + +uzp {z20.q - z23.q}, {z8.q - z11.q} // 11000001-00110111-11100001-00010110 +// CHECK-INST: uzp { z20.q - z23.q }, { z8.q - z11.q } +// CHECK-ENCODING: [0x16,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e116 + +uzp {z20.q - z23.q}, {z12.q - z15.q} // 11000001-00110111-11100001-10010110 +// CHECK-INST: uzp { z20.q - z23.q }, { z12.q - z15.q } +// CHECK-ENCODING: [0x96,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e196 + +uzp {z28.q - z31.q}, {z28.q - z31.q} // 11000001-00110111-11100011-10011110 +// CHECK-INST: uzp { z28.q - z31.q }, { z28.q - z31.q } +// CHECK-ENCODING: [0x9e,0xe3,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e39e + + +uzp {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01110110-11100000-00000010 +// CHECK-INST: uzp { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x02,0xe0,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e002 + +uzp {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01110110-11100001-00010110 +// CHECK-INST: uzp { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x16,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e116 + +uzp {z20.h - z23.h}, {z12.h - z15.h} // 11000001-01110110-11100001-10010110 +// CHECK-INST: uzp { z20.h - z23.h }, { z12.h - z15.h } +// CHECK-ENCODING: [0x96,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e196 + +uzp {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01110110-11100011-10011110 +// CHECK-INST: uzp { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9e,0xe3,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e39e + + +uzp {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10110110-11100000-00000010 +// CHECK-INST: uzp { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x02,0xe0,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e002 + +uzp {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10110110-11100001-00010110 +// CHECK-INST: uzp { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x16,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e116 + +uzp {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10110110-11100001-10010110 +// CHECK-INST: uzp { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x96,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e196 + +uzp {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10110110-11100011-10011110 +// CHECK-INST: uzp { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9e,0xe3,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e39e + + +uzp {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11110110-11100000-00000010 +// CHECK-INST: uzp { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x02,0xe0,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e002 + +uzp {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11110110-11100001-00010110 +// CHECK-INST: uzp { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x16,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e116 + +uzp {z20.d - z23.d}, {z12.d - z15.d} // 11000001-11110110-11100001-10010110 +// CHECK-INST: uzp { z20.d - z23.d }, { z12.d - z15.d } +// CHECK-ENCODING: [0x96,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e196 + +uzp {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11110110-11100011-10011110 +// CHECK-INST: uzp { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9e,0xe3,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e39e + + +uzp {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00110110-11100000-00000010 +// CHECK-INST: uzp { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x02,0xe0,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e002 + +uzp {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00110110-11100001-00010110 +// CHECK-INST: uzp { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x16,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e116 + +uzp {z20.b - z23.b}, {z12.b - z15.b} // 11000001-00110110-11100001-10010110 +// CHECK-INST: uzp { z20.b - z23.b }, { z12.b - z15.b } +// CHECK-ENCODING: [0x96,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e196 + +uzp {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00110110-11100011-10011110 +// CHECK-INST: uzp { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9e,0xe3,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e39e + diff --git a/llvm/test/MC/AArch64/SME2/zero.s b/llvm/test/MC/AArch64/SME2/zero.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zero.s @@ -0,0 +1,20 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +zero {zt0} // 11000000-01001000-00000000-00000001 +// CHECK-INST: zero { zt0 } +// CHECK-ENCODING: [0x01,0x00,0x48,0xc0] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c0480001 + diff --git a/llvm/test/MC/AArch64/SME2/zip-diagnostics.s b/llvm/test/MC/AArch64/SME2/zip-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zip-diagnostics.s @@ -0,0 +1,25 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +zip {z0.q-z2.q}, z0.q, z0.q +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: zip {z0.q-z2.q}, z0.q, z0.q +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z21.h-z22.h}, z10.h, z21.h +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: zip {z21.h-z22.h}, z10.h, z21.h +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z0.s-z4.s}, {z0.s-z3.s} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid number of vectors +// CHECK-NEXT: zip {z0.s-z4.s}, {z0.s-z3.s} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +zip {z20.b-z23.b}, {z9.b-z12.b} +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: zip {z20.b-z23.b}, {z9.b-z12.b} +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SME2/zip.s b/llvm/test/MC/AArch64/SME2/zip.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SME2/zip.s @@ -0,0 +1,263 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + + +zip {z0.q - z1.q}, z0.q, z0.q // 11000001-00100000-11010100-00000000 +// CHECK-INST: zip { z0.q, z1.q }, z0.q, z0.q +// CHECK-ENCODING: [0x00,0xd4,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d400 + +zip {z20.q - z21.q}, z10.q, z21.q // 11000001-00110101-11010101-01010100 +// CHECK-INST: zip { z20.q, z21.q }, z10.q, z21.q +// CHECK-ENCODING: [0x54,0xd5,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d554 + +zip {z22.q - z23.q}, z13.q, z8.q // 11000001-00101000-11010101-10110110 +// CHECK-INST: zip { z22.q, z23.q }, z13.q, z8.q +// CHECK-ENCODING: [0xb6,0xd5,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d5b6 + +zip {z30.q - z31.q}, z31.q, z31.q // 11000001-00111111-11010111-11111110 +// CHECK-INST: zip { z30.q, z31.q }, z31.q, z31.q +// CHECK-ENCODING: [0xfe,0xd7,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd7fe + + +zip {z0.h - z1.h}, z0.h, z0.h // 11000001-01100000-11010000-00000000 +// CHECK-INST: zip { z0.h, z1.h }, z0.h, z0.h +// CHECK-ENCODING: [0x00,0xd0,0x60,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c160d000 + +zip {z20.h - z21.h}, z10.h, z21.h // 11000001-01110101-11010001-01010100 +// CHECK-INST: zip { z20.h, z21.h }, z10.h, z21.h +// CHECK-ENCODING: [0x54,0xd1,0x75,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c175d154 + +zip {z22.h - z23.h}, z13.h, z8.h // 11000001-01101000-11010001-10110110 +// CHECK-INST: zip { z22.h, z23.h }, z13.h, z8.h +// CHECK-ENCODING: [0xb6,0xd1,0x68,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c168d1b6 + +zip {z30.h - z31.h}, z31.h, z31.h // 11000001-01111111-11010011-11111110 +// CHECK-INST: zip { z30.h, z31.h }, z31.h, z31.h +// CHECK-ENCODING: [0xfe,0xd3,0x7f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c17fd3fe + + +zip {z0.s - z1.s}, z0.s, z0.s // 11000001-10100000-11010000-00000000 +// CHECK-INST: zip { z0.s, z1.s }, z0.s, z0.s +// CHECK-ENCODING: [0x00,0xd0,0xa0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a0d000 + +zip {z20.s - z21.s}, z10.s, z21.s // 11000001-10110101-11010001-01010100 +// CHECK-INST: zip { z20.s, z21.s }, z10.s, z21.s +// CHECK-ENCODING: [0x54,0xd1,0xb5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b5d154 + +zip {z22.s - z23.s}, z13.s, z8.s // 11000001-10101000-11010001-10110110 +// CHECK-INST: zip { z22.s, z23.s }, z13.s, z8.s +// CHECK-ENCODING: [0xb6,0xd1,0xa8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1a8d1b6 + +zip {z30.s - z31.s}, z31.s, z31.s // 11000001-10111111-11010011-11111110 +// CHECK-INST: zip { z30.s, z31.s }, z31.s, z31.s +// CHECK-ENCODING: [0xfe,0xd3,0xbf,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1bfd3fe + + +zip {z0.d - z1.d}, z0.d, z0.d // 11000001-11100000-11010000-00000000 +// CHECK-INST: zip { z0.d, z1.d }, z0.d, z0.d +// CHECK-ENCODING: [0x00,0xd0,0xe0,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e0d000 + +zip {z20.d - z21.d}, z10.d, z21.d // 11000001-11110101-11010001-01010100 +// CHECK-INST: zip { z20.d, z21.d }, z10.d, z21.d +// CHECK-ENCODING: [0x54,0xd1,0xf5,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f5d154 + +zip {z22.d - z23.d}, z13.d, z8.d // 11000001-11101000-11010001-10110110 +// CHECK-INST: zip { z22.d, z23.d }, z13.d, z8.d +// CHECK-ENCODING: [0xb6,0xd1,0xe8,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1e8d1b6 + +zip {z30.d - z31.d}, z31.d, z31.d // 11000001-11111111-11010011-11111110 +// CHECK-INST: zip { z30.d, z31.d }, z31.d, z31.d +// CHECK-ENCODING: [0xfe,0xd3,0xff,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1ffd3fe + + +zip {z0.b - z1.b}, z0.b, z0.b // 11000001-00100000-11010000-00000000 +// CHECK-INST: zip { z0.b, z1.b }, z0.b, z0.b +// CHECK-ENCODING: [0x00,0xd0,0x20,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c120d000 + +zip {z20.b, z21.b}, z10.b, z21.b // 11000001-00110101-11010001-01010100 +// CHECK-INST: zip { z20.b, z21.b }, z10.b, z21.b +// CHECK-ENCODING: [0x54,0xd1,0x35,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c135d154 + +zip {z22.b - z23.b}, z13.b, z8.b // 11000001-00101000-11010001-10110110 +// CHECK-INST: zip { z22.b, z23.b }, z13.b, z8.b +// CHECK-ENCODING: [0xb6,0xd1,0x28,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c128d1b6 + +zip {z30.b - z31.b}, z31.b, z31.b // 11000001-00111111-11010011-11111110 +// CHECK-INST: zip { z30.b, z31.b }, z31.b, z31.b +// CHECK-ENCODING: [0xfe,0xd3,0x3f,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c13fd3fe + + +zip {z0.q - z3.q}, {z0.q - z3.q} // 11000001-00110111-11100000-00000000 +// CHECK-INST: zip { z0.q - z3.q }, { z0.q - z3.q } +// CHECK-ENCODING: [0x00,0xe0,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e000 + +zip {z20.q - z23.q}, {z8.q - z11.q} // 11000001-00110111-11100001-00010100 +// CHECK-INST: zip { z20.q - z23.q }, { z8.q - z11.q } +// CHECK-ENCODING: [0x14,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e114 + +zip {z20.q - z23.q}, {z12.q - z15.q} // 11000001-00110111-11100001-10010100 +// CHECK-INST: zip { z20.q - z23.q }, { z12.q - z15.q } +// CHECK-ENCODING: [0x94,0xe1,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e194 + +zip {z28.q - z31.q}, {z28.q - z31.q} // 11000001-00110111-11100011-10011100 +// CHECK-INST: zip { z28.q - z31.q }, { z28.q - z31.q } +// CHECK-ENCODING: [0x9c,0xe3,0x37,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c137e39c + + +zip {z0.h - z3.h}, {z0.h - z3.h} // 11000001-01110110-11100000-00000000 +// CHECK-INST: zip { z0.h - z3.h }, { z0.h - z3.h } +// CHECK-ENCODING: [0x00,0xe0,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e000 + +zip {z20.h - z23.h}, {z8.h - z11.h} // 11000001-01110110-11100001-00010100 +// CHECK-INST: zip { z20.h - z23.h }, { z8.h - z11.h } +// CHECK-ENCODING: [0x14,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e114 + +zip {z20.h - z23.h}, {z12.h - z15.h} // 11000001-01110110-11100001-10010100 +// CHECK-INST: zip { z20.h - z23.h }, { z12.h - z15.h } +// CHECK-ENCODING: [0x94,0xe1,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e194 + +zip {z28.h - z31.h}, {z28.h - z31.h} // 11000001-01110110-11100011-10011100 +// CHECK-INST: zip { z28.h - z31.h }, { z28.h - z31.h } +// CHECK-ENCODING: [0x9c,0xe3,0x76,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c176e39c + + +zip {z0.s - z3.s}, {z0.s - z3.s} // 11000001-10110110-11100000-00000000 +// CHECK-INST: zip { z0.s - z3.s }, { z0.s - z3.s } +// CHECK-ENCODING: [0x00,0xe0,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e000 + +zip {z20.s - z23.s}, {z8.s - z11.s} // 11000001-10110110-11100001-00010100 +// CHECK-INST: zip { z20.s - z23.s }, { z8.s - z11.s } +// CHECK-ENCODING: [0x14,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e114 + +zip {z20.s - z23.s}, {z12.s - z15.s} // 11000001-10110110-11100001-10010100 +// CHECK-INST: zip { z20.s - z23.s }, { z12.s - z15.s } +// CHECK-ENCODING: [0x94,0xe1,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e194 + +zip {z28.s - z31.s}, {z28.s - z31.s} // 11000001-10110110-11100011-10011100 +// CHECK-INST: zip { z28.s - z31.s }, { z28.s - z31.s } +// CHECK-ENCODING: [0x9c,0xe3,0xb6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1b6e39c + + +zip {z0.d - z3.d}, {z0.d - z3.d} // 11000001-11110110-11100000-00000000 +// CHECK-INST: zip { z0.d - z3.d }, { z0.d - z3.d } +// CHECK-ENCODING: [0x00,0xe0,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e000 + +zip {z20.d - z23.d}, {z8.d - z11.d} // 11000001-11110110-11100001-00010100 +// CHECK-INST: zip { z20.d - z23.d }, { z8.d - z11.d } +// CHECK-ENCODING: [0x14,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e114 + +zip {z20.d - z23.d}, {z12.d - z15.d} // 11000001-11110110-11100001-10010100 +// CHECK-INST: zip { z20.d - z23.d }, { z12.d - z15.d } +// CHECK-ENCODING: [0x94,0xe1,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e194 + +zip {z28.d - z31.d}, {z28.d - z31.d} // 11000001-11110110-11100011-10011100 +// CHECK-INST: zip { z28.d - z31.d }, { z28.d - z31.d } +// CHECK-ENCODING: [0x9c,0xe3,0xf6,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c1f6e39c + + +zip {z0.b - z3.b}, {z0.b - z3.b} // 11000001-00110110-11100000-00000000 +// CHECK-INST: zip { z0.b - z3.b }, { z0.b - z3.b } +// CHECK-ENCODING: [0x00,0xe0,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e000 + +zip {z20.b - z23.b}, {z8.b - z11.b} // 11000001-00110110-11100001-00010100 +// CHECK-INST: zip { z20.b - z23.b }, { z8.b - z11.b } +// CHECK-ENCODING: [0x14,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e114 + +zip {z20.b - z23.b}, {z12.b - z15.b} // 11000001-00110110-11100001-10010100 +// CHECK-INST: zip { z20.b - z23.b }, { z12.b - z15.b } +// CHECK-ENCODING: [0x94,0xe1,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e194 + +zip {z28.b - z31.b}, {z28.b - z31.b} // 11000001-00110110-11100011-10011100 +// CHECK-INST: zip { z28.b - z31.b }, { z28.b - z31.b } +// CHECK-ENCODING: [0x9c,0xe3,0x36,0xc1] +// CHECK-ERROR: instruction requires: sme2 +// CHECK-UNKNOWN: c136e39c + diff --git a/llvm/test/MC/AArch64/SVE/ld2b-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld2b-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld2b-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld2b-diagnostics.s @@ -81,7 +81,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld2b { z0.b, z2.b }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: ld2b { z0.b, z2.b }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld2d-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld2d-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld2d-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld2d-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld2d { z0.d, z2.d }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: ld2d { z0.d, z2.d }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld2h-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld2h-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld2h-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld2h-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld2h { z0.h, z2.h }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: ld2h { z0.h, z2.h }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld2w-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld2w-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld2w-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld2w-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld2w { z0.s, z2.s }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: ld2w { z0.s, z2.s }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld3b-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld3b-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld3b-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld3b-diagnostics.s @@ -81,7 +81,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld3b { z0.b, z1.b, z3.b }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld3b { z0.b, z1.b, z3.b }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld3d-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld3d-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld3d-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld3d-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld3d { z0.d, z1.d, z3.d }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld3d { z0.d, z1.d, z3.d }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld3h-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld3h-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld3h-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld3h-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld3h { z0.h, z1.h, z3.h }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld3h { z0.h, z1.h, z3.h }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld3w-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld3w-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld3w-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld3w-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld3w { z0.s, z1.s, z3.s }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld3w { z0.s, z1.s, z3.s }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld4b-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld4b-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld4b-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld4b-diagnostics.s @@ -81,7 +81,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld4b { z0.b, z1.b, z3.b, z5.b }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld4b { z0.b, z1.b, z3.b, z5.b }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld4d-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld4d-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld4d-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld4d-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld4d { z0.d, z1.d, z3.d, z5.d }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld4d { z0.d, z1.d, z3.d, z5.d }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld4h-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld4h-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld4h-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld4h-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld4h { z0.h, z1.h, z3.h, z5.h }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld4h { z0.h, z1.h, z3.h, z5.h }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/ld4w-diagnostics.s b/llvm/test/MC/AArch64/SVE/ld4w-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/ld4w-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/ld4w-diagnostics.s @@ -86,7 +86,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ld4w { z0.s, z1.s, z3.s, z5.s }, p0/z, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: ld4w { z0.s, z1.s, z3.s, z5.s }, p0/z, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st2b-diagnostics.s b/llvm/test/MC/AArch64/SVE/st2b-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st2b-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st2b-diagnostics.s @@ -91,7 +91,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st2b { z0.b, z2.b }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: st2b { z0.b, z2.b }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st2d-diagnostics.s b/llvm/test/MC/AArch64/SVE/st2d-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st2d-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st2d-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st2d { z0.d, z2.d }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: st2d { z0.d, z2.d }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st2h-diagnostics.s b/llvm/test/MC/AArch64/SVE/st2h-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st2h-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st2h-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st2h { z0.h, z2.h }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: st2h { z0.h, z2.h }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st2w-diagnostics.s b/llvm/test/MC/AArch64/SVE/st2w-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st2w-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st2w-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st2w { z0.s, z2.s }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: st2w { z0.s, z2.s }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st3b-diagnostics.s b/llvm/test/MC/AArch64/SVE/st3b-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st3b-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st3b-diagnostics.s @@ -91,7 +91,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st3b { z0.b, z1.b, z3.b }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st3b { z0.b, z1.b, z3.b }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st3d-diagnostics.s b/llvm/test/MC/AArch64/SVE/st3d-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st3d-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st3d-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st3d { z0.d, z1.d, z3.d }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st3d { z0.d, z1.d, z3.d }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st3h-diagnostics.s b/llvm/test/MC/AArch64/SVE/st3h-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st3h-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st3h-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st3h { z0.h, z1.h, z3.h }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st3h { z0.h, z1.h, z3.h }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st3w-diagnostics.s b/llvm/test/MC/AArch64/SVE/st3w-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st3w-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st3w-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st3w { z0.s, z1.s, z3.s }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st3w { z0.s, z1.s, z3.s }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st4b-diagnostics.s b/llvm/test/MC/AArch64/SVE/st4b-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st4b-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st4b-diagnostics.s @@ -91,7 +91,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st4b { z0.b, z1.b, z3.b, z5.b }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st4b { z0.b, z1.b, z3.b, z5.b }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st4d-diagnostics.s b/llvm/test/MC/AArch64/SVE/st4d-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st4d-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st4d-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st4d { z0.d, z1.d, z3.d, z5.d }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st4d { z0.d, z1.d, z3.d, z5.d }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st4h-diagnostics.s b/llvm/test/MC/AArch64/SVE/st4h-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st4h-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st4h-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st4h { z0.h, z1.h, z3.h, z5.h }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st4h { z0.h, z1.h, z3.h, z5.h }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE/st4w-diagnostics.s b/llvm/test/MC/AArch64/SVE/st4w-diagnostics.s --- a/llvm/test/MC/AArch64/SVE/st4w-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE/st4w-diagnostics.s @@ -96,7 +96,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: st4w { z0.s, z1.s, z3.s, z5.s }, p0, [x0] -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must have the same sequential stride // CHECK-NEXT: st4w { z0.s, z1.s, z3.s, z5.s }, p0, [x0] // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s b/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s --- a/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/ext-diagnostics.s @@ -58,7 +58,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: ext z0.b, { z1.b, z31.b }, #0 -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: ext z0.b, { z1.b, z31.b }, #0 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s b/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s --- a/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/splice-diagnostics.s @@ -34,7 +34,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: splice z0.b, p0, { z1.b, z31.b } -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: splice z0.b, p0, { z1.b, z31.b } // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s b/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s --- a/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s +++ b/llvm/test/MC/AArch64/SVE2/tbl-diagnostics.s @@ -25,7 +25,7 @@ // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: tbl z0.d, { z1.d, z21.d }, z3.d -// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: registers must be sequential +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction // CHECK-NEXT: tbl z0.d, { z1.d, z21.d }, z3.d // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1b-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1b-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1b-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ld1b {z0.b-z2.b}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1b {z0.b-z2.b}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1b {z1.b-z4.b}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ld1b {z1.b-z4.b}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1b {z7.b-z8.b}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ld1b {z7.b-z8.b}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ld1b {z0.b-z1.b}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1b {z0.b-z1.b}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1b {z0.b-z1.b}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ld1b {z0.b-z1.b}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1b {z0.b-z3.b}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1b {z0.b-z3.b}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1b {z0.b-z3.b}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1b {z0.b-z3.b}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1b {z0.b-z3.b}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1b {z0.b-z3.b}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1b.s b/llvm/test/MC/AArch64/SVE2p1/ld1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1b.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ld1b {z0.b-z1.b}, pn8/z, [x0, x0] // 10100000-00000000-00000000-00000000 +// CHECK-INST: ld1b { z0.b, z1.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x00,0x00,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0000000 + +ld1b {z20.b-z21.b}, pn13/z, [x10, x21] // 10100000-00010101-00010101-01010100 +// CHECK-INST: ld1b { z20.b, z21.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x54,0x15,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0151554 + +ld1b {z22.b-z23.b}, pn11/z, [x13, x8] // 10100000-00001000-00001101-10110110 +// CHECK-INST: ld1b { z22.b, z23.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xb6,0x0d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0080db6 + +ld1b {z30.b-z31.b}, pn15/z, [sp, xzr] // 10100000-00011111-00011111-11111110 +// CHECK-INST: ld1b { z30.b, z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xfe,0x1f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f1ffe + +ld1b {z0.b-z1.b}, pn8/z, [x0] // 10100000-01000000-00000000-00000000 +// CHECK-INST: ld1b { z0.b, z1.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x00,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0400000 + +ld1b {z20.b-z21.b}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-00010101-01010100 +// CHECK-INST: ld1b { z20.b, z21.b }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x15,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0451554 + +ld1b {z22.b-z23.b}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-00001101-10110110 +// CHECK-INST: ld1b { z22.b, z23.b }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x0d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0480db6 + +ld1b {z30.b-z31.b}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-00011111-11111110 +// CHECK-INST: ld1b { z30.b, z31.b }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x1f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f1ffe + +ld1b {z0.b-z3.b}, pn8/z, [x0, x0] // 10100000-00000000-10000000-00000000 +// CHECK-INST: ld1b { z0.b - z3.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x00,0x80,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0008000 + +ld1b {z20.b-z23.b}, pn13/z, [x10, x21] // 10100000-00010101-10010101-01010100 +// CHECK-INST: ld1b { z20.b - z23.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x54,0x95,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0159554 + +ld1b {z20.b-z23.b}, pn11/z, [x13, x8] // 10100000-00001000-10001101-10110100 +// CHECK-INST: ld1b { z20.b - z23.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xb4,0x8d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0088db4 + +ld1b {z28.b-z31.b}, pn15/z, [sp, xzr] // 10100000-00011111-10011111-11111100 +// CHECK-INST: ld1b { z28.b - z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xfc,0x9f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f9ffc + +ld1b {z0.b-z3.b}, pn8/z, [x0] // 10100000-01000000-10000000-00000000 +// CHECK-INST: ld1b { z0.b - z3.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x80,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0408000 + +ld1b {z20.b-z23.b}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-10010101-01010100 +// CHECK-INST: ld1b { z20.b - z23.b }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0x95,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0459554 + +ld1b {z20.b-z23.b}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-10001101-10110100 +// CHECK-INST: ld1b { z20.b - z23.b }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0x8d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0488db4 + +ld1b {z28.b-z31.b}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-10011111-11111100 +// CHECK-INST: ld1b { z28.b - z31.b }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0x9f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f9ffc diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1d-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ld1d {z0.d-z2.d}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1d {z0.d-z2.d}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z1.d-z4.d}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ld1d {z1.d-z4.d}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z7.d-z8.d}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ld1d {z7.d-z8.d}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ld1d {z0.d-z1.d}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1d {z0.d-z1.d}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z0.d-z1.d}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ld1d {z0.d-z1.d}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1d {z0.d-z3.d}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1d {z0.d-z3.d}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z0.d-z3.d}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1d {z0.d-z3.d}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1d {z0.d-z3.d}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1d {z0.d-z3.d}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1d.s b/llvm/test/MC/AArch64/SVE2p1/ld1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1d.s @@ -0,0 +1,111 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ld1d {z0.d-z1.d}, pn8/z, [x0, x0, lsl #3] // 10100000-00000000-01100000-00000000 +// CHECK-INST: ld1d { z0.d, z1.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x60,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0006000 + +ld1d {z20.d-z21.d}, pn13/z, [x10, x21, lsl #3] // 10100000-00010101-01110101-01010100 +// CHECK-INST: ld1d { z20.d, z21.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x54,0x75,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0157554 + +ld1d {z22.d-z23.d}, pn11/z, [x13, x8, lsl #3] // 10100000-00001000-01101101-10110110 +// CHECK-INST: ld1d { z22.d, z23.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb6,0x6d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0086db6 + +ld1d {z30.d-z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100000-00011111-01111111-11111110 +// CHECK-INST: ld1d { z30.d, z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfe,0x7f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f7ffe + +ld1d {z0.d-z1.d}, pn8/z, [x0] // 10100000-01000000-01100000-00000000 +// CHECK-INST: ld1d { z0.d, z1.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x60,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0406000 + +ld1d {z20.d-z21.d}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-01110101-01010100 +// CHECK-INST: ld1d { z20.d, z21.d }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x75,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0457554 + +ld1d {z22.d-z23.d}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-01101101-10110110 +// CHECK-INST: ld1d { z22.d, z23.d }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x6d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0486db6 + +ld1d {z30.d-z31.d}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-01111111-11111110 +// CHECK-INST: ld1d { z30.d, z31.d }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x7f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f7ffe + +ld1d {z0.d-z3.d}, pn8/z, [x0, x0, lsl #3] // 10100000-00000000-11100000-00000000 +// CHECK-INST: ld1d { z0.d - z3.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0xe0,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a000e000 + +ld1d {z20.d-z23.d}, pn13/z, [x10, x21, lsl #3] // 10100000-00010101-11110101-01010100 +// CHECK-INST: ld1d { z20.d - z23.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x54,0xf5,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a015f554 + +ld1d {z20.d-z23.d}, pn11/z, [x13, x8, lsl #3] // 10100000-00001000-11101101-10110100 +// CHECK-INST: ld1d { z20.d - z23.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb4,0xed,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a008edb4 + +ld1d {z28.d-z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100000-00011111-11111111-11111100 +// CHECK-INST: ld1d { z28.d - z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfc,0xff,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01ffffc + +ld1d {z0.d-z3.d}, pn8/z, [x0] // 10100000-01000000-11100000-00000000 +// CHECK-INST: ld1d { z0.d - z3.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a040e000 + +ld1d {z20.d-z23.d}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-11110101-01010100 +// CHECK-INST: ld1d { z20.d - z23.d }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0xf5,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a045f554 + +ld1d {z20.d-z23.d}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-11101101-10110100 +// CHECK-INST: ld1d { z20.d - z23.d }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0xed,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a048edb4 + +ld1d {z28.d-z31.d}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-11111111-11111100 +// CHECK-INST: ld1d { z28.d - z31.d }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0xff,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04ffffc + diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1h-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1h-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1h-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ld1h {z0.h-z2.h}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1h {z0.h-z2.h}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1h {z1.h-z4.h}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ld1h {z1.h-z4.h}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1h {z7.h-z8.h}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ld1h {z7.h-z8.h}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ld1h {z0.h-z1.h}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1h {z0.h-z1.h}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1h {z0.h-z1.h}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ld1h {z0.h-z1.h}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1h {z0.h-z3.h}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1h {z0.h-z3.h}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1h {z0.h-z3.h}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1h {z0.h-z3.h}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1h {z0.h-z3.h}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1h {z0.h-z3.h}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1h.s b/llvm/test/MC/AArch64/SVE2p1/ld1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1h.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ld1h {z0.h-z1.h}, pn8/z, [x0, x0, lsl #1] // 10100000-00000000-00100000-00000000 +// CHECK-INST: ld1h { z0.h, z1.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0x20,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0002000 + +ld1h {z20.h-z21.h}, pn13/z, [x10, x21, lsl #1] // 10100000-00010101-00110101-01010100 +// CHECK-INST: ld1h { z20.h, z21.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x54,0x35,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0153554 + +ld1h {z22.h-z23.h}, pn11/z, [x13, x8, lsl #1] // 10100000-00001000-00101101-10110110 +// CHECK-INST: ld1h { z22.h, z23.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb6,0x2d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0082db6 + +ld1h {z30.h-z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100000-00011111-00111111-11111110 +// CHECK-INST: ld1h { z30.h, z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfe,0x3f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f3ffe + +ld1h {z0.h-z1.h}, pn8/z, [x0] // 10100000-01000000-00100000-00000000 +// CHECK-INST: ld1h { z0.h, z1.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x20,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0402000 + +ld1h {z20.h-z21.h}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-00110101-01010100 +// CHECK-INST: ld1h { z20.h, z21.h }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x35,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0453554 + +ld1h {z22.h-z23.h}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-00101101-10110110 +// CHECK-INST: ld1h { z22.h, z23.h }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x2d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0482db6 + +ld1h {z30.h-z31.h}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-00111111-11111110 +// CHECK-INST: ld1h { z30.h, z31.h }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x3f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f3ffe + +ld1h {z0.h-z3.h}, pn8/z, [x0, x0, lsl #1] // 10100000-00000000-10100000-00000000 +// CHECK-INST: ld1h { z0.h - z3.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0xa0,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a000a000 + +ld1h {z20.h-z23.h}, pn13/z, [x10, x21, lsl #1] // 10100000-00010101-10110101-01010100 +// CHECK-INST: ld1h { z20.h - z23.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x54,0xb5,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a015b554 + +ld1h {z20.h-z23.h}, pn11/z, [x13, x8, lsl #1] // 10100000-00001000-10101101-10110100 +// CHECK-INST: ld1h { z20.h - z23.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb4,0xad,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a008adb4 + +ld1h {z28.h-z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100000-00011111-10111111-11111100 +// CHECK-INST: ld1h { z28.h - z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfc,0xbf,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01fbffc + +ld1h {z0.h-z3.h}, pn8/z, [x0] // 10100000-01000000-10100000-00000000 +// CHECK-INST: ld1h { z0.h - z3.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0xa0,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a040a000 + +ld1h {z20.h-z23.h}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-10110101-01010100 +// CHECK-INST: ld1h { z20.h - z23.h }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0xb5,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a045b554 + +ld1h {z20.h-z23.h}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-10101101-10110100 +// CHECK-INST: ld1h { z20.h - z23.h }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0xad,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a048adb4 + +ld1h {z28.h-z31.h}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-10111111-11111100 +// CHECK-INST: ld1h { z28.h - z31.h }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0xbf,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04fbffc diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ld1w-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ld1w {z0.s-z2.s}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ld1w {z0.s-z2.s}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z1.s-z4.s}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ld1w {z1.s-z4.s}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z7.s-z8.s}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ld1w {z7.s-z8.s}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ld1w {z0.s-z1.s}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1w {z0.s-z1.s}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z0.s-z1.s}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ld1w {z0.s-z1.s}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ld1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ld1w {z0.s-z3.s}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1w {z0.s-z3.s}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z0.s-z3.s}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1w {z0.s-z3.s}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ld1w {z0.s-z3.s}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ld1w {z0.s-z3.s}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ld1w.s b/llvm/test/MC/AArch64/SVE2p1/ld1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ld1w.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ld1w {z0.s-z1.s}, pn8/z, [x0, x0, lsl #2] // 10100000-00000000-01000000-00000000 +// CHECK-INST: ld1w { z0.s, z1.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x40,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0004000 + +ld1w {z20.s-z21.s}, pn13/z, [x10, x21, lsl #2] // 10100000-00010101-01010101-01010100 +// CHECK-INST: ld1w { z20.s, z21.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x54,0x55,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0155554 + +ld1w {z22.s-z23.s}, pn11/z, [x13, x8, lsl #2] // 10100000-00001000-01001101-10110110 +// CHECK-INST: ld1w { z22.s, z23.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb6,0x4d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0084db6 + +ld1w {z30.s-z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100000-00011111-01011111-11111110 +// CHECK-INST: ld1w { z30.s, z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfe,0x5f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f5ffe + +ld1w {z0.s-z1.s}, pn8/z, [x0] // 10100000-01000000-01000000-00000000 +// CHECK-INST: ld1w { z0.s, z1.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0x40,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0404000 + +ld1w {z20.s-z21.s}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-01010101-01010100 +// CHECK-INST: ld1w { z20.s, z21.s }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x55,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0455554 + +ld1w {z22.s-z23.s}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-01001101-10110110 +// CHECK-INST: ld1w { z22.s, z23.s }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x4d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0484db6 + +ld1w {z30.s-z31.s}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-01011111-11111110 +// CHECK-INST: ld1w { z30.s, z31.s }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x5f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f5ffe + +ld1w {z0.s-z3.s}, pn8/z, [x0, x0, lsl #2] // 10100000-00000000-11000000-00000000 +// CHECK-INST: ld1w { z0.s - z3.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0xc0,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a000c000 + +ld1w {z20.s-z23.s}, pn13/z, [x10, x21, lsl #2] // 10100000-00010101-11010101-01010100 +// CHECK-INST: ld1w { z20.s - z23.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x54,0xd5,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a015d554 + +ld1w {z20.s-z23.s}, pn11/z, [x13, x8, lsl #2] // 10100000-00001000-11001101-10110100 +// CHECK-INST: ld1w { z20.s - z23.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb4,0xcd,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a008cdb4 + +ld1w {z28.s-z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100000-00011111-11011111-11111100 +// CHECK-INST: ld1w { z28.s - z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfc,0xdf,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01fdffc + +ld1w {z0.s-z3.s}, pn8/z, [x0] // 10100000-01000000-11000000-00000000 +// CHECK-INST: ld1w { z0.s - z3.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x00,0xc0,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a040c000 + +ld1w {z20.s-z23.s}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-11010101-01010100 +// CHECK-INST: ld1w { z20.s - z23.s }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0xd5,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a045d554 + +ld1w {z20.s-z23.s}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-11001101-10110100 +// CHECK-INST: ld1w { z20.s - z23.s }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0xcd,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a048cdb4 + +ld1w {z28.s-z31.s}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-11011111-11111100 +// CHECK-INST: ld1w { z28.s - z31.s }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0xdf,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04fdffc diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1b-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1b-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1b-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ldnt1b {z0.b-z2.b}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ldnt1b {z0.b-z2.b}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1b {z1.b-z4.b}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ldnt1b {z1.b-z4.b}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1b {z7.b-z8.b}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ldnt1b {z7.b-z8.b}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ldnt1b {z0.b-z1.b}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1b {z0.b-z1.b}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1b {z0.b-z1.b}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ldnt1b {z0.b-z1.b}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ldnt1b {z0.b-z3.b}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1b {z0.b-z3.b}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1b {z0.b-z3.b}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1b {z0.b-z3.b}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1b {z0.b-z3.b}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1b {z0.b-z3.b}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1b.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ldnt1b {z0.b-z1.b}, pn8/z, [x0, x0] // 10100000-00000000-00000000-00000001 +// CHECK-INST: ldnt1b { z0.b, z1.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x01,0x00,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0000001 + +ldnt1b {z20.b-z21.b}, pn13/z, [x10, x21] // 10100000-00010101-00010101-01010101 +// CHECK-INST: ldnt1b { z20.b, z21.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x55,0x15,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0151555 + +ldnt1b {z22.b-z23.b}, pn11/z, [x13, x8] // 10100000-00001000-00001101-10110111 +// CHECK-INST: ldnt1b { z22.b, z23.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xb7,0x0d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0080db7 + +ldnt1b {z30.b-z31.b}, pn15/z, [sp, xzr] // 10100000-00011111-00011111-11111111 +// CHECK-INST: ldnt1b { z30.b, z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xff,0x1f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f1fff + +ldnt1b {z0.b-z1.b}, pn8/z, [x0] // 10100000-01000000-00000000-00000001 +// CHECK-INST: ldnt1b { z0.b, z1.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0x00,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0400001 + +ldnt1b {z20.b-z21.b}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-00010101-01010101 +// CHECK-INST: ldnt1b { z20.b, z21.b }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x15,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0451555 + +ldnt1b {z22.b-z23.b}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-00001101-10110111 +// CHECK-INST: ldnt1b { z22.b, z23.b }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x0d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0480db7 + +ldnt1b {z30.b-z31.b}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-00011111-11111111 +// CHECK-INST: ldnt1b { z30.b, z31.b }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x1f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f1fff + +ldnt1b {z0.b-z3.b}, pn8/z, [x0, x0] // 10100000-00000000-10000000-00000001 +// CHECK-INST: ldnt1b { z0.b - z3.b }, pn8/z, [x0, x0] +// CHECK-ENCODING: [0x01,0x80,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0008001 + +ldnt1b {z20.b-z23.b}, pn13/z, [x10, x21] // 10100000-00010101-10010101-01010101 +// CHECK-INST: ldnt1b { z20.b - z23.b }, pn13/z, [x10, x21] +// CHECK-ENCODING: [0x55,0x95,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0159555 + +ldnt1b {z20.b-z23.b}, pn11/z, [x13, x8] // 10100000-00001000-10001101-10110101 +// CHECK-INST: ldnt1b { z20.b - z23.b }, pn11/z, [x13, x8] +// CHECK-ENCODING: [0xb5,0x8d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0088db5 + +ldnt1b {z28.b-z31.b}, pn15/z, [sp, xzr] // 10100000-00011111-10011111-11111101 +// CHECK-INST: ldnt1b { z28.b - z31.b }, pn15/z, [sp, xzr] +// CHECK-ENCODING: [0xfd,0x9f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f9ffd + +ldnt1b {z0.b-z3.b}, pn8/z, [x0] // 10100000-01000000-10000000-00000001 +// CHECK-INST: ldnt1b { z0.b - z3.b }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0x80,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0408001 + +ldnt1b {z20.b-z23.b}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-10010101-01010101 +// CHECK-INST: ldnt1b { z20.b - z23.b }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0x95,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0459555 + +ldnt1b {z20.b-z23.b}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-10001101-10110101 +// CHECK-INST: ldnt1b { z20.b - z23.b }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0x8d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0488db5 + +ldnt1b {z28.b-z31.b}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-10011111-11111101 +// CHECK-INST: ldnt1b { z28.b - z31.b }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0x9f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f9ffd diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1d-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1d-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1d-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ldnt1d {z0.d-z2.d}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ldnt1d {z0.d-z2.d}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1d {z1.d-z4.d}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ldnt1d {z1.d-z4.d}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1d {z7.d-z8.d}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ldnt1d {z7.d-z8.d}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ldnt1d {z0.d-z1.d}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1d {z0.d-z1.d}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1d {z0.d-z1.d}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ldnt1d {z0.d-z1.d}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ldnt1d {z0.d-z3.d}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1d {z0.d-z3.d}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1d {z0.d-z3.d}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1d {z0.d-z3.d}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1d {z0.d-z3.d}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1d {z0.d-z3.d}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1d.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ldnt1d {z0.d-z1.d}, pn8/z, [x0, x0, lsl #3] // 10100000-00000000-01100000-00000001 +// CHECK-INST: ldnt1d { z0.d, z1.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x01,0x60,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0006001 + +ldnt1d {z20.d-z21.d}, pn13/z, [x10, x21, lsl #3] // 10100000-00010101-01110101-01010101 +// CHECK-INST: ldnt1d { z20.d, z21.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x75,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0157555 + +ldnt1d {z22.d-z23.d}, pn11/z, [x13, x8, lsl #3] // 10100000-00001000-01101101-10110111 +// CHECK-INST: ldnt1d { z22.d, z23.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x6d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0086db7 + +ldnt1d {z30.d-z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100000-00011111-01111111-11111111 +// CHECK-INST: ldnt1d { z30.d, z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xff,0x7f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f7fff + +ldnt1d {z0.d-z1.d}, pn8/z, [x0] // 10100000-01000000-01100000-00000001 +// CHECK-INST: ldnt1d { z0.d, z1.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0x60,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0406001 + +ldnt1d {z20.d-z21.d}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-01110101-01010101 +// CHECK-INST: ldnt1d { z20.d, z21.d }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x75,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0457555 + +ldnt1d {z22.d-z23.d}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-01101101-10110111 +// CHECK-INST: ldnt1d { z22.d, z23.d }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x6d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0486db7 + +ldnt1d {z30.d-z31.d}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-01111111-11111111 +// CHECK-INST: ldnt1d { z30.d, z31.d }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x7f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f7fff + +ldnt1d {z0.d-z3.d}, pn8/z, [x0, x0, lsl #3] // 10100000-00000000-11100000-00000001 +// CHECK-INST: ldnt1d { z0.d - z3.d }, pn8/z, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x01,0xe0,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a000e001 + +ldnt1d {z20.d-z23.d}, pn13/z, [x10, x21, lsl #3] // 10100000-00010101-11110101-01010101 +// CHECK-INST: ldnt1d { z20.d - z23.d }, pn13/z, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0xf5,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a015f555 + +ldnt1d {z20.d-z23.d}, pn11/z, [x13, x8, lsl #3] // 10100000-00001000-11101101-10110101 +// CHECK-INST: ldnt1d { z20.d - z23.d }, pn11/z, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb5,0xed,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a008edb5 + +ldnt1d {z28.d-z31.d}, pn15/z, [sp, xzr, lsl #3] // 10100000-00011111-11111111-11111101 +// CHECK-INST: ldnt1d { z28.d - z31.d }, pn15/z, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfd,0xff,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01ffffd + +ldnt1d {z0.d-z3.d}, pn8/z, [x0] // 10100000-01000000-11100000-00000001 +// CHECK-INST: ldnt1d { z0.d - z3.d }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0xe0,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a040e001 + +ldnt1d {z20.d-z23.d}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-11110101-01010101 +// CHECK-INST: ldnt1d { z20.d - z23.d }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0xf5,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a045f555 + +ldnt1d {z20.d-z23.d}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-11101101-10110101 +// CHECK-INST: ldnt1d { z20.d - z23.d }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0xed,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a048edb5 + +ldnt1d {z28.d-z31.d}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-11111111-11111101 +// CHECK-INST: ldnt1d { z28.d - z31.d }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0xff,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04ffffd diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1h-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1h-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1h-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ldnt1h {z0.h-z2.h}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ldnt1h {z0.h-z2.h}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1h {z1.h-z4.h}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ldnt1h {z1.h-z4.h}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1h {z7.h-z8.h}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ldnt1h {z7.h-z8.h}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ldnt1h {z0.h-z1.h}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1h {z0.h-z1.h}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1h {z0.h-z1.h}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ldnt1h {z0.h-z1.h}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ldnt1h {z0.h-z3.h}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1h {z0.h-z3.h}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1h {z0.h-z3.h}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1h {z0.h-z3.h}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1h {z0.h-z3.h}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1h {z0.h-z3.h}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1h.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ldnt1h {z0.h-z1.h}, pn8/z, [x0, x0, lsl #1] // 10100000-00000000-00100000-00000001 +// CHECK-INST: ldnt1h { z0.h, z1.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x01,0x20,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0002001 + +ldnt1h {z20.h-z21.h}, pn13/z, [x10, x21, lsl #1] // 10100000-00010101-00110101-01010101 +// CHECK-INST: ldnt1h { z20.h, z21.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x55,0x35,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0153555 + +ldnt1h {z22.h-z23.h}, pn11/z, [x13, x8, lsl #1] // 10100000-00001000-00101101-10110111 +// CHECK-INST: ldnt1h { z22.h, z23.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb7,0x2d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0082db7 + +ldnt1h {z30.h-z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100000-00011111-00111111-11111111 +// CHECK-INST: ldnt1h { z30.h, z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xff,0x3f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f3fff + +ldnt1h {z0.h-z1.h}, pn8/z, [x0] // 10100000-01000000-00100000-00000001 +// CHECK-INST: ldnt1h { z0.h, z1.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0x20,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0402001 + +ldnt1h {z20.h-z21.h}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-00110101-01010101 +// CHECK-INST: ldnt1h { z20.h, z21.h }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0453555 + +ldnt1h {z22.h-z23.h}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-00101101-10110111 +// CHECK-INST: ldnt1h { z22.h, z23.h }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0482db7 + +ldnt1h {z30.h-z31.h}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-00111111-11111111 +// CHECK-INST: ldnt1h { z30.h, z31.h }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f3fff + +ldnt1h {z0.h-z3.h}, pn8/z, [x0, x0, lsl #1] // 10100000-00000000-10100000-00000001 +// CHECK-INST: ldnt1h { z0.h - z3.h }, pn8/z, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x01,0xa0,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a000a001 + +ldnt1h {z20.h-z23.h}, pn13/z, [x10, x21, lsl #1] // 10100000-00010101-10110101-01010101 +// CHECK-INST: ldnt1h { z20.h - z23.h }, pn13/z, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x55,0xb5,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a015b555 + +ldnt1h {z20.h-z23.h}, pn11/z, [x13, x8, lsl #1] // 10100000-00001000-10101101-10110101 +// CHECK-INST: ldnt1h { z20.h - z23.h }, pn11/z, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb5,0xad,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a008adb5 + +ldnt1h {z28.h-z31.h}, pn15/z, [sp, xzr, lsl #1] // 10100000-00011111-10111111-11111101 +// CHECK-INST: ldnt1h { z28.h - z31.h }, pn15/z, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfd,0xbf,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01fbffd + +ldnt1h {z0.h-z3.h}, pn8/z, [x0] // 10100000-01000000-10100000-00000001 +// CHECK-INST: ldnt1h { z0.h - z3.h }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0xa0,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a040a001 + +ldnt1h {z20.h-z23.h}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-10110101-01010101 +// CHECK-INST: ldnt1h { z20.h - z23.h }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0xb5,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a045b555 + +ldnt1h {z20.h-z23.h}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-10101101-10110101 +// CHECK-INST: ldnt1h { z20.h - z23.h }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0xad,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a048adb5 + +ldnt1h {z28.h-z31.h}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-10111111-11111101 +// CHECK-INST: ldnt1h { z28.h - z31.h }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0xbf,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04fbffd diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1w-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1w-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1w-diagnostics.s @@ -0,0 +1,55 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +ldnt1w {z0.s-z2.s}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ldnt1w {z0.s-z2.s}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1w {z1.s-z4.s}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: ldnt1w {z1.s-z4.s}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1w {z7.s-z8.s}, pn8/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: ldnt1w {z7.s-z8.s}, pn8/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +ldnt1w {z0.s-z1.s}, pn7/z, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1w {z0.s-z1.s}, pn7/z, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1w {z0.s-z1.s}, pn8/m, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: expecting 'z' predication +// CHECK-NEXT: ldnt1w {z0.s-z1.s}, pn8/m, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: ldnt1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +ldnt1w {z0.s-z3.s}, pn8/z, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1w {z0.s-z3.s}, pn8/z, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1w {z0.s-z3.s}, pn8/z, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1w {z0.s-z3.s}, pn8/z, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +ldnt1w {z0.s-z3.s}, pn8/z, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: ldnt1w {z0.s-z3.s}, pn8/z, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s b/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ldnt1w.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ldnt1w {z0.s-z1.s}, pn8/z, [x0, x0, lsl #2] // 10100000-00000000-01000000-00000001 +// CHECK-INST: ldnt1w { z0.s, z1.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x01,0x40,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0004001 + +ldnt1w {z20.s-z21.s}, pn13/z, [x10, x21, lsl #2] // 10100000-00010101-01010101-01010101 +// CHECK-INST: ldnt1w { z20.s, z21.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x55,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0155555 + +ldnt1w {z22.s-z23.s}, pn11/z, [x13, x8, lsl #2] // 10100000-00001000-01001101-10110111 +// CHECK-INST: ldnt1w { z22.s, z23.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0084db7 + +ldnt1w {z30.s-z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100000-00011111-01011111-11111111 +// CHECK-INST: ldnt1w { z30.s, z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xff,0x5f,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01f5fff + +ldnt1w {z0.s-z1.s}, pn8/z, [x0] // 10100000-01000000-01000000-00000001 +// CHECK-INST: ldnt1w { z0.s, z1.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0x40,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0404001 + +ldnt1w {z20.s-z21.s}, pn13/z, [x10, #10, mul vl] // 10100000-01000101-01010101-01010101 +// CHECK-INST: ldnt1w { z20.s, z21.s }, pn13/z, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x55,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0455555 + +ldnt1w {z22.s-z23.s}, pn11/z, [x13, #-16, mul vl] // 10100000-01001000-01001101-10110111 +// CHECK-INST: ldnt1w { z22.s, z23.s }, pn11/z, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x4d,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0484db7 + +ldnt1w {z30.s-z31.s}, pn15/z, [sp, #-2, mul vl] // 10100000-01001111-01011111-11111111 +// CHECK-INST: ldnt1w { z30.s, z31.s }, pn15/z, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x5f,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04f5fff + +ldnt1w {z0.s-z3.s}, pn8/z, [x0, x0, lsl #2] // 10100000-00000000-11000000-00000001 +// CHECK-INST: ldnt1w { z0.s - z3.s }, pn8/z, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x01,0xc0,0x00,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a000c001 + +ldnt1w {z20.s-z23.s}, pn13/z, [x10, x21, lsl #2] // 10100000-00010101-11010101-01010101 +// CHECK-INST: ldnt1w { z20.s - z23.s }, pn13/z, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0xd5,0x15,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a015d555 + +ldnt1w {z20.s-z23.s}, pn11/z, [x13, x8, lsl #2] // 10100000-00001000-11001101-10110101 +// CHECK-INST: ldnt1w { z20.s - z23.s }, pn11/z, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb5,0xcd,0x08,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a008cdb5 + +ldnt1w {z28.s-z31.s}, pn15/z, [sp, xzr, lsl #2] // 10100000-00011111-11011111-11111101 +// CHECK-INST: ldnt1w { z28.s - z31.s }, pn15/z, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfd,0xdf,0x1f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a01fdffd + +ldnt1w {z0.s-z3.s}, pn8/z, [x0] // 10100000-01000000-11000000-00000001 +// CHECK-INST: ldnt1w { z0.s - z3.s }, pn8/z, [x0] +// CHECK-ENCODING: [0x01,0xc0,0x40,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a040c001 + +ldnt1w {z20.s-z23.s}, pn13/z, [x10, #20, mul vl] // 10100000-01000101-11010101-01010101 +// CHECK-INST: ldnt1w { z20.s - z23.s }, pn13/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0xd5,0x45,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a045d555 + +ldnt1w {z20.s-z23.s}, pn11/z, [x13, #-32, mul vl] // 10100000-01001000-11001101-10110101 +// CHECK-INST: ldnt1w { z20.s - z23.s }, pn11/z, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0xcd,0x48,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a048cdb5 + +ldnt1w {z28.s-z31.s}, pn15/z, [sp, #-4, mul vl] // 10100000-01001111-11011111-11111101 +// CHECK-INST: ldnt1w { z28.s - z31.s }, pn15/z, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0xdf,0x4f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a04fdffd diff --git a/llvm/test/MC/AArch64/SVE2p1/pext-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/pext-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/pext-diagnostics.s @@ -0,0 +1,22 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid predicate as counter register + +pext p0.h, pn3[0] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: pext p0.h, pn3[0] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid lane index + +pext p0.d, pn8[4] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: pext p0.d, pn8[4] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +pext p0.b, pn8[-1] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: vector lane must be an integer in range [0, 3]. +// CHECK-NEXT: pext p0.b, pn8[-1] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/pext.s b/llvm/test/MC/AArch64/SVE2p1/pext.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/pext.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +pext p0.h, pn8[0] // 00100101-01100000-01110000-00010000 +// CHECK-INST: pext p0.h, pn8[0] +// CHECK-ENCODING: [0x10,0x70,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25607010 + +pext p5.h, pn10[1] // 00100101-01100000-01110001-01010101 +// CHECK-INST: pext p5.h, pn10[1] +// CHECK-ENCODING: [0x55,0x71,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25607155 + +pext p7.h, pn13[1] // 00100101-01100000-01110001-10110111 +// CHECK-INST: pext p7.h, pn13[1] +// CHECK-ENCODING: [0xb7,0x71,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 256071b7 + +pext p15.h, pn15[3] // 00100101-01100000-01110011-11111111 +// CHECK-INST: pext p15.h, pn15[3] +// CHECK-ENCODING: [0xff,0x73,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 256073ff + +pext p0.s, pn8[0] // 00100101-10100000-01110000-00010000 +// CHECK-INST: pext p0.s, pn8[0] +// CHECK-ENCODING: [0x10,0x70,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a07010 + +pext p5.s, pn10[1] // 00100101-10100000-01110001-01010101 +// CHECK-INST: pext p5.s, pn10[1] +// CHECK-ENCODING: [0x55,0x71,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a07155 + +pext p7.s, pn13[1] // 00100101-10100000-01110001-10110111 +// CHECK-INST: pext p7.s, pn13[1] +// CHECK-ENCODING: [0xb7,0x71,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a071b7 + +pext p15.s, pn15[3] // 00100101-10100000-01110011-11111111 +// CHECK-INST: pext p15.s, pn15[3] +// CHECK-ENCODING: [0xff,0x73,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a073ff + +pext p0.d, pn8[0] // 00100101-11100000-01110000-00010000 +// CHECK-INST: pext p0.d, pn8[0] +// CHECK-ENCODING: [0x10,0x70,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e07010 + +pext p5.d, pn10[1] // 00100101-11100000-01110001-01010101 +// CHECK-INST: pext p5.d, pn10[1] +// CHECK-ENCODING: [0x55,0x71,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e07155 + +pext p7.d, pn13[1] // 00100101-11100000-01110001-10110111 +// CHECK-INST: pext p7.d, pn13[1] +// CHECK-ENCODING: [0xb7,0x71,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e071b7 + +pext p15.d, pn15[3] // 00100101-11100000-01110011-11111111 +// CHECK-INST: pext p15.d, pn15[3] +// CHECK-ENCODING: [0xff,0x73,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e073ff + +pext p0.b, pn8[0] // 00100101-00100000-01110000-00010000 +// CHECK-INST: pext p0.b, pn8[0] +// CHECK-ENCODING: [0x10,0x70,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25207010 + +pext p5.b, pn10[1] // 00100101-00100000-01110001-01010101 +// CHECK-INST: pext p5.b, pn10[1] +// CHECK-ENCODING: [0x55,0x71,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25207155 + +pext p7.b, pn13[1] // 00100101-00100000-01110001-10110111 +// CHECK-INST: pext p7.b, pn13[1] +// CHECK-ENCODING: [0xb7,0x71,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 252071b7 + +pext p15.b, pn15[3] // 00100101-00100000-01110011-11111111 +// CHECK-INST: pext p15.b, pn15[3] +// CHECK-ENCODING: [0xff,0x73,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 252073ff diff --git a/llvm/test/MC/AArch64/SVE2p1/ptrue-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/ptrue-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ptrue-diagnostics.s @@ -0,0 +1,26 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid use of predicate as counter register + +ptrue pn7.b, vlx1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid predicate register, expected PN in range pn8..pn15 with element suffix. +// CHECK-NEXT: ptrue pn7.b, vlx1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid SME pattern. + +ptrue pn8.b, vlx0 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: ptrue pn8.b, vlx0 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid use of predicate as counter without suffix + +ptrue pn8, vlx1 +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid predicate register, expected PN in range pn8..pn15 with element suffix. +// CHECK-NEXT: ptrue pn8, vlx1 +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + diff --git a/llvm/test/MC/AArch64/SVE2p1/ptrue.s b/llvm/test/MC/AArch64/SVE2p1/ptrue.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/ptrue.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +ptrue pn8.h // 00100101-01100000-01111000-00010000 +// CHECK-INST: ptrue pn8.h +// CHECK-ENCODING: [0x10,0x78,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25607810 + +ptrue pn13.h // 00100101-01100000-01111000-00010101 +// CHECK-INST: ptrue pn13.h +// CHECK-ENCODING: [0x15,0x78,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25607815 + +ptrue pn15.h // 00100101-01100000-01111000-00010111 +// CHECK-INST: ptrue pn15.h +// CHECK-ENCODING: [0x17,0x78,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25607817 + +ptrue pn9.h // 00100101-01100000-01111000-00010001 +// CHECK-INST: ptrue pn9.h +// CHECK-ENCODING: [0x11,0x78,0x60,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25607811 + +ptrue pn8.s // 00100101-10100000-01111000-00010000 +// CHECK-INST: ptrue pn8.s +// CHECK-ENCODING: [0x10,0x78,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a07810 + +ptrue pn13.s // 00100101-10100000-01111000-00010101 +// CHECK-INST: ptrue pn13.s +// CHECK-ENCODING: [0x15,0x78,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a07815 + +ptrue pn15.s // 00100101-10100000-01111000-00010111 +// CHECK-INST: ptrue pn15.s +// CHECK-ENCODING: [0x17,0x78,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a07817 + +ptrue pn9.s // 00100101-10100000-01111000-00010001 +// CHECK-INST: ptrue pn9.s +// CHECK-ENCODING: [0x11,0x78,0xa0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25a07811 + +ptrue pn8.d // 00100101-11100000-01111000-00010000 +// CHECK-INST: ptrue pn8.d +// CHECK-ENCODING: [0x10,0x78,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e07810 + +ptrue pn13.d // 00100101-11100000-01111000-00010101 +// CHECK-INST: ptrue pn13.d +// CHECK-ENCODING: [0x15,0x78,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e07815 + +ptrue pn15.d // 00100101-11100000-01111000-00010111 +// CHECK-INST: ptrue pn15.d +// CHECK-ENCODING: [0x17,0x78,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e07817 + +ptrue pn9.d // 00100101-11100000-01111000-00010001 +// CHECK-INST: ptrue pn9.d +// CHECK-ENCODING: [0x11,0x78,0xe0,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25e07811 + +ptrue pn8.b // 00100101-00100000-01111000-00010000 +// CHECK-INST: ptrue pn8.b +// CHECK-ENCODING: [0x10,0x78,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25207810 + +ptrue pn13.b // 00100101-00100000-01111000-00010101 +// CHECK-INST: ptrue pn13.b +// CHECK-ENCODING: [0x15,0x78,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25207815 + +ptrue pn15.b // 00100101-00100000-01111000-00010111 +// CHECK-INST: ptrue pn15.b +// CHECK-ENCODING: [0x17,0x78,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25207817 + +ptrue pn9.b // 00100101-00100000-01111000-00010001 +// CHECK-INST: ptrue pn9.b +// CHECK-ENCODING: [0x11,0x78,0x20,0x25] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: 25207811 diff --git a/llvm/test/MC/AArch64/SVE2p1/st1b-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1b-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1b-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +st1b {z0.b-z2.b}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1b {z0.b-z2.b}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1b {z1.b-z4.b}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: st1b {z1.b-z4.b}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1b {z7.b-z8.b}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: st1b {z7.b-z8.b}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +st1b {z0.b-z1.b}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1b {z0.b-z1.b}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1b {z0.b-z3.b}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1b {z0.b-z3.b}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1b {z0.b-z3.b}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1b {z0.b-z3.b}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1b {z0.b-z3.b}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1b {z0.b-z3.b}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1b.s b/llvm/test/MC/AArch64/SVE2p1/st1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1b.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +st1b {z0.b-z1.b}, pn8, [x0, x0] // 10100000-00100000-00000000-00000000 +// CHECK-INST: st1b { z0.b, z1.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x00,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0200000 + +st1b {z20.b-z21.b}, pn13, [x10, x21] // 10100000-00110101-00010101-01010100 +// CHECK-INST: st1b { z20.b, z21.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x54,0x15,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0351554 + +st1b {z22.b-z23.b}, pn11, [x13, x8] // 10100000-00101000-00001101-10110110 +// CHECK-INST: st1b { z22.b, z23.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb6,0x0d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0280db6 + +st1b {z30.b-z31.b}, pn15, [sp, xzr] // 10100000-00111111-00011111-11111110 +// CHECK-INST: st1b { z30.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xfe,0x1f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f1ffe + +st1b {z0.b-z1.b}, pn8, [x0] // 10100000-01100000-00000000-00000000 +// CHECK-INST: st1b { z0.b, z1.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x00,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0600000 + +st1b {z20.b-z21.b}, pn13, [x10, #10, mul vl] // 10100000-01100101-00010101-01010100 +// CHECK-INST: st1b { z20.b, z21.b }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x15,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0651554 + +st1b {z22.b-z23.b}, pn11, [x13, #-16, mul vl] // 10100000-01101000-00001101-10110110 +// CHECK-INST: st1b { z22.b, z23.b }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x0d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0680db6 + +st1b {z30.b-z31.b}, pn15, [sp, #-2, mul vl] // 10100000-01101111-00011111-11111110 +// CHECK-INST: st1b { z30.b, z31.b }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x1f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f1ffe + +st1b {z0.b-z3.b}, pn8, [x0, x0] // 10100000-00100000-10000000-00000000 +// CHECK-INST: st1b { z0.b - z3.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x00,0x80,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0208000 + +st1b {z20.b-z23.b}, pn13, [x10, x21] // 10100000-00110101-10010101-01010100 +// CHECK-INST: st1b { z20.b - z23.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x54,0x95,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0359554 + +st1b {z20.b-z23.b}, pn11, [x13, x8] // 10100000-00101000-10001101-10110100 +// CHECK-INST: st1b { z20.b - z23.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb4,0x8d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0288db4 + +st1b {z28.b-z31.b}, pn15, [sp, xzr] // 10100000-00111111-10011111-11111100 +// CHECK-INST: st1b { z28.b - z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xfc,0x9f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f9ffc + +st1b {z0.b-z3.b}, pn8, [x0] // 10100000-01100000-10000000-00000000 +// CHECK-INST: st1b { z0.b - z3.b }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x80,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0608000 + +st1b {z20.b-z23.b}, pn13, [x10, #20, mul vl] // 10100000-01100101-10010101-01010100 +// CHECK-INST: st1b { z20.b - z23.b }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0x95,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0659554 + +st1b {z20.b-z23.b}, pn11, [x13, #-32, mul vl] // 10100000-01101000-10001101-10110100 +// CHECK-INST: st1b { z20.b - z23.b }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0x8d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0688db4 + +st1b {z28.b-z31.b}, pn15, [sp, #-4, mul vl] // 10100000-01101111-10011111-11111100 +// CHECK-INST: st1b { z28.b - z31.b }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0x9f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f9ffc diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1d-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1d-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +st1d {z0.d-z2.d}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1d {z0.d-z2.d}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z1.d-z4.d}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: st1d {z1.d-z4.d}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z7.d-z8.d}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: st1d {z7.d-z8.d}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +st1d {z0.d-z1.d}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1d {z0.d-z1.d}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1d {z0.d-z3.d}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1d {z0.d-z3.d}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z0.d-z3.d}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1d {z0.d-z3.d}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1d {z0.d-z3.d}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1d {z0.d-z3.d}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1d.s b/llvm/test/MC/AArch64/SVE2p1/st1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1d.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +st1d {z0.d-z1.d}, pn8, [x0, x0, lsl #3] // 10100000-00100000-01100000-00000000 +// CHECK-INST: st1d { z0.d, z1.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0x60,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0206000 + +st1d {z20.d-z21.d}, pn13, [x10, x21, lsl #3] // 10100000-00110101-01110101-01010100 +// CHECK-INST: st1d { z20.d, z21.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x54,0x75,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0357554 + +st1d {z22.d-z23.d}, pn11, [x13, x8, lsl #3] // 10100000-00101000-01101101-10110110 +// CHECK-INST: st1d { z22.d, z23.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb6,0x6d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0286db6 + +st1d {z30.d-z31.d}, pn15, [sp, xzr, lsl #3] // 10100000-00111111-01111111-11111110 +// CHECK-INST: st1d { z30.d, z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfe,0x7f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f7ffe + +st1d {z0.d-z1.d}, pn8, [x0] // 10100000-01100000-01100000-00000000 +// CHECK-INST: st1d { z0.d, z1.d }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x60,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0606000 + +st1d {z20.d-z21.d}, pn13, [x10, #10, mul vl] // 10100000-01100101-01110101-01010100 +// CHECK-INST: st1d { z20.d, z21.d }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x75,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0657554 + +st1d {z22.d-z23.d}, pn11, [x13, #-16, mul vl] // 10100000-01101000-01101101-10110110 +// CHECK-INST: st1d { z22.d, z23.d }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x6d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0686db6 + +st1d {z30.d-z31.d}, pn15, [sp, #-2, mul vl] // 10100000-01101111-01111111-11111110 +// CHECK-INST: st1d { z30.d, z31.d }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x7f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f7ffe + +st1d {z0.d-z3.d}, pn8, [x0, x0, lsl #3] // 10100000-00100000-11100000-00000000 +// CHECK-INST: st1d { z0.d - z3.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x00,0xe0,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a020e000 + +st1d {z20.d-z23.d}, pn13, [x10, x21, lsl #3] // 10100000-00110101-11110101-01010100 +// CHECK-INST: st1d { z20.d - z23.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x54,0xf5,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a035f554 + +st1d {z20.d-z23.d}, pn11, [x13, x8, lsl #3] // 10100000-00101000-11101101-10110100 +// CHECK-INST: st1d { z20.d - z23.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb4,0xed,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a028edb4 + +st1d {z28.d-z31.d}, pn15, [sp, xzr, lsl #3] // 10100000-00111111-11111111-11111100 +// CHECK-INST: st1d { z28.d - z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfc,0xff,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03ffffc + +st1d {z0.d-z3.d}, pn8, [x0] // 10100000-01100000-11100000-00000000 +// CHECK-INST: st1d { z0.d - z3.d }, pn8, [x0] +// CHECK-ENCODING: [0x00,0xe0,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a060e000 + +st1d {z20.d-z23.d}, pn13, [x10, #20, mul vl] // 10100000-01100101-11110101-01010100 +// CHECK-INST: st1d { z20.d - z23.d }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0xf5,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a065f554 + +st1d {z20.d-z23.d}, pn11, [x13, #-32, mul vl] // 10100000-01101000-11101101-10110100 +// CHECK-INST: st1d { z20.d - z23.d }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0xed,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a068edb4 + +st1d {z28.d-z31.d}, pn15, [sp, #-4, mul vl] // 10100000-01101111-11111111-11111100 +// CHECK-INST: st1d { z28.d - z31.d }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0xff,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06ffffc diff --git a/llvm/test/MC/AArch64/SVE2p1/st1h-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1h-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1h-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +st1h {z0.h-z2.h}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1h {z0.h-z2.h}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1h {z1.h-z4.h}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: st1h {z1.h-z4.h}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1h {z7.h-z8.h}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: st1h {z7.h-z8.h}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +st1h {z0.h-z1.h}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1h {z0.h-z1.h}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1h {z0.h-z3.h}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1h {z0.h-z3.h}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1h {z0.h-z3.h}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1h {z0.h-z3.h}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1h {z0.h-z3.h}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1h {z0.h-z3.h}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1h.s b/llvm/test/MC/AArch64/SVE2p1/st1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1h.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +st1h {z0.h-z1.h}, pn8, [x0, x0, lsl #1] // 10100000-00100000-00100000-00000000 +// CHECK-INST: st1h { z0.h, z1.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0x20,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0202000 + +st1h {z20.h-z21.h}, pn13, [x10, x21, lsl #1] // 10100000-00110101-00110101-01010100 +// CHECK-INST: st1h { z20.h, z21.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x54,0x35,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0353554 + +st1h {z22.h-z23.h}, pn11, [x13, x8, lsl #1] // 10100000-00101000-00101101-10110110 +// CHECK-INST: st1h { z22.h, z23.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb6,0x2d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0282db6 + +st1h {z30.h-z31.h}, pn15, [sp, xzr, lsl #1] // 10100000-00111111-00111111-11111110 +// CHECK-INST: st1h { z30.h, z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfe,0x3f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f3ffe + +st1h {z0.h-z1.h}, pn8, [x0] // 10100000-01100000-00100000-00000000 +// CHECK-INST: st1h { z0.h, z1.h }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x20,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0602000 + +st1h {z20.h-z21.h}, pn13, [x10, #10, mul vl] // 10100000-01100101-00110101-01010100 +// CHECK-INST: st1h { z20.h, z21.h }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x35,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0653554 + +st1h {z22.h-z23.h}, pn11, [x13, #-16, mul vl] // 10100000-01101000-00101101-10110110 +// CHECK-INST: st1h { z22.h, z23.h }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x2d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0682db6 + +st1h {z30.h-z31.h}, pn15, [sp, #-2, mul vl] // 10100000-01101111-00111111-11111110 +// CHECK-INST: st1h { z30.h, z31.h }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x3f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f3ffe + +st1h {z0.h-z3.h}, pn8, [x0, x0, lsl #1] // 10100000-00100000-10100000-00000000 +// CHECK-INST: st1h { z0.h - z3.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x00,0xa0,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a020a000 + +st1h {z20.h-z23.h}, pn13, [x10, x21, lsl #1] // 10100000-00110101-10110101-01010100 +// CHECK-INST: st1h { z20.h - z23.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x54,0xb5,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a035b554 + +st1h {z20.h-z23.h}, pn11, [x13, x8, lsl #1] // 10100000-00101000-10101101-10110100 +// CHECK-INST: st1h { z20.h - z23.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb4,0xad,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a028adb4 + +st1h {z28.h-z31.h}, pn15, [sp, xzr, lsl #1] // 10100000-00111111-10111111-11111100 +// CHECK-INST: st1h { z28.h - z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfc,0xbf,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03fbffc + +st1h {z0.h-z3.h}, pn8, [x0] // 10100000-01100000-10100000-00000000 +// CHECK-INST: st1h { z0.h - z3.h }, pn8, [x0] +// CHECK-ENCODING: [0x00,0xa0,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a060a000 + +st1h {z20.h-z23.h}, pn13, [x10, #20, mul vl] // 10100000-01100101-10110101-01010100 +// CHECK-INST: st1h { z20.h - z23.h }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0xb5,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a065b554 + +st1h {z20.h-z23.h}, pn11, [x13, #-32, mul vl] // 10100000-01101000-10101101-10110100 +// CHECK-INST: st1h { z20.h - z23.h }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0xad,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a068adb4 + +st1h {z28.h-z31.h}, pn15, [sp, #-4, mul vl] // 10100000-01101111-10111111-11111100 +// CHECK-INST: st1h { z28.h - z31.h }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0xbf,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06fbffc diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/st1w-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1w-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +st1w {z0.s-z2.s}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: st1w {z0.s-z2.s}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z1.s-z4.s}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: st1w {z1.s-z4.s}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z7.s-z8.s}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: st1w {z7.s-z8.s}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +st1w {z0.s-z1.s}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1w {z0.s-z1.s}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: st1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +st1w {z0.s-z3.s}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1w {z0.s-z3.s}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z0.s-z3.s}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1w {z0.s-z3.s}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +st1w {z0.s-z3.s}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: st1w {z0.s-z3.s}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/st1w.s b/llvm/test/MC/AArch64/SVE2p1/st1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/st1w.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +st1w {z0.s-z1.s}, pn8, [x0, x0, lsl #2] // 10100000-00100000-01000000-00000000 +// CHECK-INST: st1w { z0.s, z1.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0x40,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0204000 + +st1w {z20.s-z21.s}, pn13, [x10, x21, lsl #2] // 10100000-00110101-01010101-01010100 +// CHECK-INST: st1w { z20.s, z21.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x54,0x55,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0355554 + +st1w {z22.s-z23.s}, pn11, [x13, x8, lsl #2] // 10100000-00101000-01001101-10110110 +// CHECK-INST: st1w { z22.s, z23.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb6,0x4d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0284db6 + +st1w {z30.s-z31.s}, pn15, [sp, xzr, lsl #2] // 10100000-00111111-01011111-11111110 +// CHECK-INST: st1w { z30.s, z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfe,0x5f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f5ffe + +st1w {z0.s-z1.s}, pn8, [x0] // 10100000-01100000-01000000-00000000 +// CHECK-INST: st1w { z0.s, z1.s }, pn8, [x0] +// CHECK-ENCODING: [0x00,0x40,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0604000 + +st1w {z20.s-z21.s}, pn13, [x10, #10, mul vl] // 10100000-01100101-01010101-01010100 +// CHECK-INST: st1w { z20.s, z21.s }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x54,0x55,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0655554 + +st1w {z22.s-z23.s}, pn11, [x13, #-16, mul vl] // 10100000-01101000-01001101-10110110 +// CHECK-INST: st1w { z22.s, z23.s }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb6,0x4d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0684db6 + +st1w {z30.s-z31.s}, pn15, [sp, #-2, mul vl] // 10100000-01101111-01011111-11111110 +// CHECK-INST: st1w { z30.s, z31.s }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xfe,0x5f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f5ffe + +st1w {z0.s-z3.s}, pn8, [x0, x0, lsl #2] // 10100000-00100000-11000000-00000000 +// CHECK-INST: st1w { z0.s - z3.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x00,0xc0,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a020c000 + +st1w {z20.s-z23.s}, pn13, [x10, x21, lsl #2] // 10100000-00110101-11010101-01010100 +// CHECK-INST: st1w { z20.s - z23.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x54,0xd5,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a035d554 + +st1w {z20.s-z23.s}, pn11, [x13, x8, lsl #2] // 10100000-00101000-11001101-10110100 +// CHECK-INST: st1w { z20.s - z23.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb4,0xcd,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a028cdb4 + +st1w {z28.s-z31.s}, pn15, [sp, xzr, lsl #2] // 10100000-00111111-11011111-11111100 +// CHECK-INST: st1w { z28.s - z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfc,0xdf,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03fdffc + +st1w {z0.s-z3.s}, pn8, [x0] // 10100000-01100000-11000000-00000000 +// CHECK-INST: st1w { z0.s - z3.s }, pn8, [x0] +// CHECK-ENCODING: [0x00,0xc0,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a060c000 + +st1w {z20.s-z23.s}, pn13, [x10, #20, mul vl] // 10100000-01100101-11010101-01010100 +// CHECK-INST: st1w { z20.s - z23.s }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x54,0xd5,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a065d554 + +st1w {z20.s-z23.s}, pn11, [x13, #-32, mul vl] // 10100000-01101000-11001101-10110100 +// CHECK-INST: st1w { z20.s - z23.s }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb4,0xcd,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a068cdb4 + +st1w {z28.s-z31.s}, pn15, [sp, #-4, mul vl] // 10100000-01101111-11011111-11111100 +// CHECK-INST: st1w { z28.s - z31.s }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfc,0xdf,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06fdffc diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1b-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/stnt1b-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1b-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +stnt1b {z0.b-z2.b}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: stnt1b {z0.b-z2.b}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1b {z1.b-z4.b}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: stnt1b {z1.b-z4.b}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1b {z7.b-z8.b}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: stnt1b {z7.b-z8.b}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +stnt1b {z0.b-z1.b}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1b {z0.b-z1.b}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1b {z0.b-z1.b}, pn8.b, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +stnt1b {z0.b-z3.b}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1b {z0.b-z3.b}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1b {z0.b-z3.b}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1b {z0.b-z3.b}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1b {z0.b-z3.b}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1b {z0.b-z3.b}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1b.s b/llvm/test/MC/AArch64/SVE2p1/stnt1b.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1b.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +stnt1b {z0.b-z1.b}, pn8, [x0, x0] // 10100000-00100000-00000000-00000001 +// CHECK-INST: stnt1b { z0.b, z1.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x01,0x00,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0200001 + +stnt1b {z20.b-z21.b}, pn13, [x10, x21] // 10100000-00110101-00010101-01010101 +// CHECK-INST: stnt1b { z20.b, z21.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x55,0x15,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0351555 + +stnt1b {z22.b-z23.b}, pn11, [x13, x8] // 10100000-00101000-00001101-10110111 +// CHECK-INST: stnt1b { z22.b, z23.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb7,0x0d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0280db7 + +stnt1b {z30.b-z31.b}, pn15, [sp, xzr] // 10100000-00111111-00011111-11111111 +// CHECK-INST: stnt1b { z30.b, z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xff,0x1f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f1fff + +stnt1b {z0.b-z1.b}, pn8, [x0] // 10100000-01100000-00000000-00000001 +// CHECK-INST: stnt1b { z0.b, z1.b }, pn8, [x0] +// CHECK-ENCODING: [0x01,0x00,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0600001 + +stnt1b {z20.b-z21.b}, pn13, [x10, #10, mul vl] // 10100000-01100101-00010101-01010101 +// CHECK-INST: stnt1b { z20.b, z21.b }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x15,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0651555 + +stnt1b {z22.b-z23.b}, pn11, [x13, #-16, mul vl] // 10100000-01101000-00001101-10110111 +// CHECK-INST: stnt1b { z22.b, z23.b }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x0d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0680db7 + +stnt1b {z30.b-z31.b}, pn15, [sp, #-2, mul vl] // 10100000-01101111-00011111-11111111 +// CHECK-INST: stnt1b { z30.b, z31.b }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x1f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f1fff + +stnt1b {z0.b-z3.b}, pn8, [x0, x0] // 10100000-00100000-10000000-00000001 +// CHECK-INST: stnt1b { z0.b - z3.b }, pn8, [x0, x0] +// CHECK-ENCODING: [0x01,0x80,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0208001 + +stnt1b {z20.b-z23.b}, pn13, [x10, x21] // 10100000-00110101-10010101-01010101 +// CHECK-INST: stnt1b { z20.b - z23.b }, pn13, [x10, x21] +// CHECK-ENCODING: [0x55,0x95,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0359555 + +stnt1b {z20.b-z23.b}, pn11, [x13, x8] // 10100000-00101000-10001101-10110101 +// CHECK-INST: stnt1b { z20.b - z23.b }, pn11, [x13, x8] +// CHECK-ENCODING: [0xb5,0x8d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0288db5 + +stnt1b {z28.b-z31.b}, pn15, [sp, xzr] // 10100000-00111111-10011111-11111101 +// CHECK-INST: stnt1b { z28.b - z31.b }, pn15, [sp, xzr] +// CHECK-ENCODING: [0xfd,0x9f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f9ffd + +stnt1b {z0.b-z3.b}, pn8, [x0] // 10100000-01100000-10000000-00000001 +// CHECK-INST: stnt1b { z0.b - z3.b }, pn8, [x0] +// CHECK-ENCODING: [0x01,0x80,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0608001 + +stnt1b {z20.b-z23.b}, pn13, [x10, #20, mul vl] // 10100000-01100101-10010101-01010101 +// CHECK-INST: stnt1b { z20.b - z23.b }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0x95,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0659555 + +stnt1b {z20.b-z23.b}, pn11, [x13, #-32, mul vl] // 10100000-01101000-10001101-10110101 +// CHECK-INST: stnt1b { z20.b - z23.b }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0x8d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0688db5 + +stnt1b {z28.b-z31.b}, pn15, [sp, #-4, mul vl] // 10100000-01101111-10011111-11111101 +// CHECK-INST: stnt1b { z28.b - z31.b }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0x9f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f9ffd diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1d-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/stnt1d-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1d-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +stnt1d {z0.d-z2.d}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: stnt1d {z0.d-z2.d}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1d {z1.d-z4.d}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: stnt1d {z1.d-z4.d}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1d {z7.d-z8.d}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: stnt1d {z7.d-z8.d}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +stnt1d {z0.d-z1.d}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1d {z0.d-z1.d}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1d {z0.d-z1.d}, pn8.d, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +stnt1d {z0.d-z3.d}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1d {z0.d-z3.d}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1d {z0.d-z3.d}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1d {z0.d-z3.d}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1d {z0.d-z3.d}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1d {z0.d-z3.d}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1d.s b/llvm/test/MC/AArch64/SVE2p1/stnt1d.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1d.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +stnt1d {z0.d-z1.d}, pn8, [x0, x0, lsl #3] // 10100000-00100000-01100000-00000001 +// CHECK-INST: stnt1d { z0.d, z1.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x01,0x60,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0206001 + +stnt1d {z20.d-z21.d}, pn13, [x10, x21, lsl #3] // 10100000-00110101-01110101-01010101 +// CHECK-INST: stnt1d { z20.d, z21.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0x75,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0357555 + +stnt1d {z22.d-z23.d}, pn11, [x13, x8, lsl #3] // 10100000-00101000-01101101-10110111 +// CHECK-INST: stnt1d { z22.d, z23.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb7,0x6d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0286db7 + +stnt1d {z30.d-z31.d}, pn15, [sp, xzr, lsl #3] // 10100000-00111111-01111111-11111111 +// CHECK-INST: stnt1d { z30.d, z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xff,0x7f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f7fff + +stnt1d {z0.d-z1.d}, pn8, [x0] // 10100000-01100000-01100000-00000001 +// CHECK-INST: stnt1d { z0.d, z1.d }, pn8, [x0] +// CHECK-ENCODING: [0x01,0x60,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0606001 + +stnt1d {z20.d-z21.d}, pn13, [x10, #10, mul vl] // 10100000-01100101-01110101-01010101 +// CHECK-INST: stnt1d { z20.d, z21.d }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x75,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0657555 + +stnt1d {z22.d-z23.d}, pn11, [x13, #-16, mul vl] // 10100000-01101000-01101101-10110111 +// CHECK-INST: stnt1d { z22.d, z23.d }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x6d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0686db7 + +stnt1d {z30.d-z31.d}, pn15, [sp, #-2, mul vl] // 10100000-01101111-01111111-11111111 +// CHECK-INST: stnt1d { z30.d, z31.d }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x7f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f7fff + +stnt1d {z0.d-z3.d}, pn8, [x0, x0, lsl #3] // 10100000-00100000-11100000-00000001 +// CHECK-INST: stnt1d { z0.d - z3.d }, pn8, [x0, x0, lsl #3] +// CHECK-ENCODING: [0x01,0xe0,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a020e001 + +stnt1d {z20.d-z23.d}, pn13, [x10, x21, lsl #3] // 10100000-00110101-11110101-01010101 +// CHECK-INST: stnt1d { z20.d - z23.d }, pn13, [x10, x21, lsl #3] +// CHECK-ENCODING: [0x55,0xf5,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a035f555 + +stnt1d {z20.d-z23.d}, pn11, [x13, x8, lsl #3] // 10100000-00101000-11101101-10110101 +// CHECK-INST: stnt1d { z20.d - z23.d }, pn11, [x13, x8, lsl #3] +// CHECK-ENCODING: [0xb5,0xed,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a028edb5 + +stnt1d {z28.d-z31.d}, pn15, [sp, xzr, lsl #3] // 10100000-00111111-11111111-11111101 +// CHECK-INST: stnt1d { z28.d - z31.d }, pn15, [sp, xzr, lsl #3] +// CHECK-ENCODING: [0xfd,0xff,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03ffffd + +stnt1d {z0.d-z3.d}, pn8, [x0] // 10100000-01100000-11100000-00000001 +// CHECK-INST: stnt1d { z0.d - z3.d }, pn8, [x0] +// CHECK-ENCODING: [0x01,0xe0,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a060e001 + +stnt1d {z20.d-z23.d}, pn13, [x10, #20, mul vl] // 10100000-01100101-11110101-01010101 +// CHECK-INST: stnt1d { z20.d - z23.d }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0xf5,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a065f555 + +stnt1d {z20.d-z23.d}, pn11, [x13, #-32, mul vl] // 10100000-01101000-11101101-10110101 +// CHECK-INST: stnt1d { z20.d - z23.d }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0xed,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a068edb5 + +stnt1d {z28.d-z31.d}, pn15, [sp, #-4, mul vl] // 10100000-01101111-11111111-11111101 +// CHECK-INST: stnt1d { z28.d - z31.d }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0xff,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06ffffd diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1h-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/stnt1h-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1h-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +stnt1h {z0.h-z2.h}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: stnt1h {z0.h-z2.h}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1h {z1.h-z4.h}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: stnt1h {z1.h-z4.h}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1h {z7.h-z8.h}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: stnt1h {z7.h-z8.h}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +stnt1h {z0.h-z1.h}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1h {z0.h-z1.h}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1h {z0.h-z1.h}, pn8.h, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +stnt1h {z0.h-z3.h}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1h {z0.h-z3.h}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1h {z0.h-z3.h}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1h {z0.h-z3.h}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1h {z0.h-z3.h}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1h {z0.h-z3.h}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1h.s b/llvm/test/MC/AArch64/SVE2p1/stnt1h.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1h.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +stnt1h {z0.h-z1.h}, pn8, [x0, x0, lsl #1] // 10100000-00100000-00100000-00000001 +// CHECK-INST: stnt1h { z0.h, z1.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x01,0x20,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0202001 + +stnt1h {z20.h-z21.h}, pn13, [x10, x21, lsl #1] // 10100000-00110101-00110101-01010101 +// CHECK-INST: stnt1h { z20.h, z21.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x55,0x35,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0353555 + +stnt1h {z22.h-z23.h}, pn11, [x13, x8, lsl #1] // 10100000-00101000-00101101-10110111 +// CHECK-INST: stnt1h { z22.h, z23.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb7,0x2d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0282db7 + +stnt1h {z30.h-z31.h}, pn15, [sp, xzr, lsl #1] // 10100000-00111111-00111111-11111111 +// CHECK-INST: stnt1h { z30.h, z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xff,0x3f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f3fff + +stnt1h {z0.h-z1.h}, pn8, [x0] // 10100000-01100000-00100000-00000001 +// CHECK-INST: stnt1h { z0.h, z1.h }, pn8, [x0] +// CHECK-ENCODING: [0x01,0x20,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0602001 + +stnt1h {z20.h-z21.h}, pn13, [x10, #10, mul vl] // 10100000-01100101-00110101-01010101 +// CHECK-INST: stnt1h { z20.h, z21.h }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x35,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0653555 + +stnt1h {z22.h-z23.h}, pn11, [x13, #-16, mul vl] // 10100000-01101000-00101101-10110111 +// CHECK-INST: stnt1h { z22.h, z23.h }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x2d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0682db7 + +stnt1h {z30.h-z31.h}, pn15, [sp, #-2, mul vl] // 10100000-01101111-00111111-11111111 +// CHECK-INST: stnt1h { z30.h, z31.h }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x3f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f3fff + +stnt1h {z0.h-z3.h}, pn8, [x0, x0, lsl #1] // 10100000-00100000-10100000-00000001 +// CHECK-INST: stnt1h { z0.h - z3.h }, pn8, [x0, x0, lsl #1] +// CHECK-ENCODING: [0x01,0xa0,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a020a001 + +stnt1h {z20.h-z23.h}, pn13, [x10, x21, lsl #1] // 10100000-00110101-10110101-01010101 +// CHECK-INST: stnt1h { z20.h - z23.h }, pn13, [x10, x21, lsl #1] +// CHECK-ENCODING: [0x55,0xb5,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a035b555 + +stnt1h {z20.h-z23.h}, pn11, [x13, x8, lsl #1] // 10100000-00101000-10101101-10110101 +// CHECK-INST: stnt1h { z20.h - z23.h }, pn11, [x13, x8, lsl #1] +// CHECK-ENCODING: [0xb5,0xad,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a028adb5 + +stnt1h {z28.h-z31.h}, pn15, [sp, xzr, lsl #1] // 10100000-00111111-10111111-11111101 +// CHECK-INST: stnt1h { z28.h - z31.h }, pn15, [sp, xzr, lsl #1] +// CHECK-ENCODING: [0xfd,0xbf,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03fbffd + +stnt1h {z0.h-z3.h}, pn8, [x0] // 10100000-01100000-10100000-00000001 +// CHECK-INST: stnt1h { z0.h - z3.h }, pn8, [x0] +// CHECK-ENCODING: [0x01,0xa0,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a060a001 + +stnt1h {z20.h-z23.h}, pn13, [x10, #20, mul vl] // 10100000-01100101-10110101-01010101 +// CHECK-INST: stnt1h { z20.h - z23.h }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0xb5,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a065b555 + +stnt1h {z20.h-z23.h}, pn11, [x13, #-32, mul vl] // 10100000-01101000-10101101-10110101 +// CHECK-INST: stnt1h { z20.h - z23.h }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0xad,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a068adb5 + +stnt1h {z28.h-z31.h}, pn15, [sp, #-4, mul vl] // 10100000-01101111-10111111-11111101 +// CHECK-INST: stnt1h { z28.h - z31.h }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0xbf,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06fbffd diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1w-diagnostics.s b/llvm/test/MC/AArch64/SVE2p1/stnt1w-diagnostics.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1w-diagnostics.s @@ -0,0 +1,50 @@ +// RUN: not llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 2>&1 < %s | FileCheck %s + +// --------------------------------------------------------------------------// +// Invalid vector list + +stnt1w {z0.s-z2.s}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// CHECK-NEXT: stnt1w {z0.s-z2.s}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1w {z1.s-z4.s}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 4 consecutive SVE vectors, where the first vector is a multiple of 4 and with matching element types +// CHECK-NEXT: stnt1w {z1.s-z4.s}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1w {z7.s-z8.s}, pn8, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: Invalid vector list, expected list with 2 consecutive SVE vectors, where the first vector is a multiple of 2 and with matching element types +// CHECK-NEXT: stnt1w {z7.s-z8.s}, pn8, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid predicate-as-counter register + +stnt1w {z0.s-z1.s}, pn7, [x0, x0, lsl #3] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1w {z0.s-z1.s}, pn7, [x0, x0, lsl #3] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid restricted predicate-as-counter register expected pn8..pn15 +// CHECK-NEXT: stnt1w {z0.s-z1.s}, pn8.s, [x13, #-8, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +// --------------------------------------------------------------------------// +// Invalid immediate range + +stnt1w {z0.s-z3.s}, pn8, [x0, #-9, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1w {z0.s-z3.s}, pn8, [x0, #-9, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1w {z0.s-z3.s}, pn8, [x0, #-36, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1w {z0.s-z3.s}, pn8, [x0, #-36, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: + +stnt1w {z0.s-z3.s}, pn8, [x0, #32, mul vl] +// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: index must be a multiple of 4 in range [-32, 28] +// CHECK-NEXT: stnt1w {z0.s-z3.s}, pn8, [x0, #32, mul vl] +// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}: diff --git a/llvm/test/MC/AArch64/SVE2p1/stnt1w.s b/llvm/test/MC/AArch64/SVE2p1/stnt1w.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/AArch64/SVE2p1/stnt1w.s @@ -0,0 +1,110 @@ +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sve2p1 < %s \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST +// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \ +// RUN: | FileCheck %s --check-prefix=CHECK-ERROR +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d --mattr=+sme2 - | FileCheck %s --check-prefix=CHECK-INST +// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+sme2 < %s \ +// RUN: | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN +// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+sme2 < %s \ +// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \ +// RUN: | llvm-mc -triple=aarch64 -mattr=+sme2 -disassemble -show-encoding \ +// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST + +stnt1w {z0.s-z1.s}, pn8, [x0, x0, lsl #2] // 10100000-00100000-01000000-00000001 +// CHECK-INST: stnt1w { z0.s, z1.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x01,0x40,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0204001 + +stnt1w {z20.s-z21.s}, pn13, [x10, x21, lsl #2] // 10100000-00110101-01010101-01010101 +// CHECK-INST: stnt1w { z20.s, z21.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0x55,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0355555 + +stnt1w {z22.s-z23.s}, pn11, [x13, x8, lsl #2] // 10100000-00101000-01001101-10110111 +// CHECK-INST: stnt1w { z22.s, z23.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb7,0x4d,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0284db7 + +stnt1w {z30.s-z31.s}, pn15, [sp, xzr, lsl #2] // 10100000-00111111-01011111-11111111 +// CHECK-INST: stnt1w { z30.s, z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xff,0x5f,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03f5fff + +stnt1w {z0.s-z1.s}, pn8, [x0] // 10100000-01100000-01000000-00000001 +// CHECK-INST: stnt1w { z0.s, z1.s }, pn8, [x0] +// CHECK-ENCODING: [0x01,0x40,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0604001 + +stnt1w {z20.s-z21.s}, pn13, [x10, #10, mul vl] // 10100000-01100101-01010101-01010101 +// CHECK-INST: stnt1w { z20.s, z21.s }, pn13, [x10, #10, mul vl] +// CHECK-ENCODING: [0x55,0x55,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0655555 + +stnt1w {z22.s-z23.s}, pn11, [x13, #-16, mul vl] // 10100000-01101000-01001101-10110111 +// CHECK-INST: stnt1w { z22.s, z23.s }, pn11, [x13, #-16, mul vl] +// CHECK-ENCODING: [0xb7,0x4d,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a0684db7 + +stnt1w {z30.s-z31.s}, pn15, [sp, #-2, mul vl] // 10100000-01101111-01011111-11111111 +// CHECK-INST: stnt1w { z30.s, z31.s }, pn15, [sp, #-2, mul vl] +// CHECK-ENCODING: [0xff,0x5f,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06f5fff + +stnt1w {z0.s-z3.s}, pn8, [x0, x0, lsl #2] // 10100000-00100000-11000000-00000001 +// CHECK-INST: stnt1w { z0.s - z3.s }, pn8, [x0, x0, lsl #2] +// CHECK-ENCODING: [0x01,0xc0,0x20,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a020c001 + +stnt1w {z20.s-z23.s}, pn13, [x10, x21, lsl #2] // 10100000-00110101-11010101-01010101 +// CHECK-INST: stnt1w { z20.s - z23.s }, pn13, [x10, x21, lsl #2] +// CHECK-ENCODING: [0x55,0xd5,0x35,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a035d555 + +stnt1w {z20.s-z23.s}, pn11, [x13, x8, lsl #2] // 10100000-00101000-11001101-10110101 +// CHECK-INST: stnt1w { z20.s - z23.s }, pn11, [x13, x8, lsl #2] +// CHECK-ENCODING: [0xb5,0xcd,0x28,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a028cdb5 + +stnt1w {z28.s-z31.s}, pn15, [sp, xzr, lsl #2] // 10100000-00111111-11011111-11111101 +// CHECK-INST: stnt1w { z28.s - z31.s }, pn15, [sp, xzr, lsl #2] +// CHECK-ENCODING: [0xfd,0xdf,0x3f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a03fdffd + +stnt1w {z0.s-z3.s}, pn8, [x0] // 10100000-01100000-11000000-00000001 +// CHECK-INST: stnt1w { z0.s - z3.s }, pn8, [x0] +// CHECK-ENCODING: [0x01,0xc0,0x60,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a060c001 + +stnt1w {z20.s-z23.s}, pn13, [x10, #20, mul vl] // 10100000-01100101-11010101-01010101 +// CHECK-INST: stnt1w { z20.s - z23.s }, pn13, [x10, #20, mul vl] +// CHECK-ENCODING: [0x55,0xd5,0x65,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a065d555 + +stnt1w {z20.s-z23.s}, pn11, [x13, #-32, mul vl] // 10100000-01101000-11001101-10110101 +// CHECK-INST: stnt1w { z20.s - z23.s }, pn11, [x13, #-32, mul vl] +// CHECK-ENCODING: [0xb5,0xcd,0x68,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a068cdb5 + +stnt1w {z28.s-z31.s}, pn15, [sp, #-4, mul vl] // 10100000-01101111-11011111-11111101 +// CHECK-INST: stnt1w { z28.s - z31.s }, pn15, [sp, #-4, mul vl] +// CHECK-ENCODING: [0xfd,0xdf,0x6f,0xa0] +// CHECK-ERROR: instruction requires: sme2 or sve2p1 +// CHECK-UNKNOWN: a06fdffd diff --git a/llvm/test/MC/AArch64/neon-diagnostics.s b/llvm/test/MC/AArch64/neon-diagnostics.s --- a/llvm/test/MC/AArch64/neon-diagnostics.s +++ b/llvm/test/MC/AArch64/neon-diagnostics.s @@ -3877,7 +3877,7 @@ ld1 {v1.8h-v1.8h}, [x0] ld1 {v15.8h-v17.4h}, [x15] ld1 {v0.8b-v2.8b, [x0] -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors @@ -3907,7 +3907,7 @@ // CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: ld2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: ld2 {v15.4h, v16.4h, v17.4h}, [x32] @@ -3930,7 +3930,7 @@ // CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must have the same sequential stride // CHECK-ERROR: ld3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: mismatched register size suffix @@ -3948,7 +3948,7 @@ // CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: ld4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must have the same sequential stride // CHECK-ERROR: ld4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors @@ -3985,7 +3985,7 @@ st1 {v1.8h-v1.8h}, [x0] st1 {v15.8h-v17.4h}, [x15] st1 {v0.8b-v2.8b, [x0] -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st1 {v0.16b, v2.16b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors @@ -4015,7 +4015,7 @@ // CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st2 {v15.8h, v16.4h}, [x15] // CHECK-ERROR: ^ -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: invalid operand for instruction // CHECK-ERROR: st2 {v0.8b, v2.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid operand for instruction @@ -4039,7 +4039,7 @@ // CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st3 {v0.8b, v1,8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must have the same sequential stride // CHECK-ERROR: st3 {v0.8b, v2.8b, v3.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: mismatched register size suffix @@ -4057,7 +4057,7 @@ // CHECK-ERROR: error: mismatched register size suffix // CHECK-ERROR: st4 {v15.8h, v16.8h, v17.4h, v18.8h}, [x15] // CHECK-ERROR: ^ -// CHECK-ERROR: error: registers must be sequential +// CHECK-ERROR: error: registers must have the same sequential stride // CHECK-ERROR: st4 {v0.8b, v2.8b, v3.8b, v4.8b}, [x0] // CHECK-ERROR: ^ // CHECK-ERROR: error: invalid number of vectors diff --git a/llvm/test/Transforms/Inline/AArch64/sme-streaming-attr.ll b/llvm/test/Transforms/Inline/AArch64/sme-streaming-attr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/sme-streaming-attr.ll @@ -0,0 +1,238 @@ +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -inline | FileCheck %s + +declare void @inlined_streaming_body() "aarch64_pstate_sm_enabled"; +declare void @inlined_locally_streaming_body() "aarch64_pstate_sm_body"; +declare void @inlined_streaming_compatible_body() "aarch64_pstate_sm_compatible"; +declare void @inlined_streaming_compatible_locally_streaming_body() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body"; +declare void @inlined_normal_body(); + +define void @streaming_callee() "aarch64_pstate_sm_enabled" { +entry: + call void @inlined_streaming_body() + ret void +} + +define void @locally_streaming_callee() "aarch64_pstate_sm_body" { +entry: + call void @inlined_locally_streaming_body() + ret void +} + +define void @streaming_compatible_callee() "aarch64_pstate_sm_compatible" { +entry: + call void @inlined_streaming_compatible_body() + ret void +} + +define void @streaming_compatible_locally_streaming_callee() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +entry: + call void @inlined_streaming_compatible_locally_streaming_body() + ret void +} + +define void @normal_callee() { +entry: + call void @inlined_normal_body() + ret void +} + +define void @streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: @streaming_caller_streaming_callee_inline( +; CHECK: call void @inlined_streaming_body() +entry: + call void @streaming_callee() + ret void +} + +define void @streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: @streaming_caller_locally_streaming_callee_inline( +; CHECK: call void @inlined_locally_streaming_body() +entry: + call void @locally_streaming_callee() + ret void +} + +define void @streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: @streaming_caller_streaming_compatible_callee_inline( +; CHECK: call void @inlined_streaming_compatible_body() +entry: + call void @streaming_compatible_callee() + ret void +} + +define void @streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: @streaming_caller_streaming_compatible_locally_streaming_callee_inline( +; CHECK: call void @inlined_streaming_compatible_locally_streaming_body() +entry: + call void @streaming_compatible_locally_streaming_callee() + ret void +} + +define void @streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_enabled" { +; CHECK-LABEL: @streaming_caller_normal_callee_dont_inline( +; CHECK: call void @normal_callee() +entry: + call void @normal_callee() + ret void +} + +define void @locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: @locally_streaming_caller_streaming_callee_inline( +; CHECK: call void @inlined_streaming_body() +entry: + call void @streaming_callee() + ret void +} + +define void @locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: @locally_streaming_caller_locally_streaming_callee_inline( +; CHECK: call void @inlined_locally_streaming_body() +entry: + call void @locally_streaming_callee() + ret void +} + +define void @locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: @locally_streaming_caller_streaming_compatible_callee_inline( +; CHECK: call void @inlined_streaming_compatible_body() +entry: + call void @streaming_compatible_callee() + ret void +} + +define void @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: @locally_streaming_caller_streaming_compatible_locally_streaming_callee_inline( +; CHECK: call void @inlined_streaming_compatible_locally_streaming_body() +entry: + call void @streaming_compatible_locally_streaming_callee() + ret void +} + +define void @locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_body" { +; CHECK-LABEL: @locally_streaming_caller_normal_callee_dont_inline( +; CHECK: call void @normal_callee() +entry: + call void @normal_callee() + ret void +} + +define void @streaming_compatible_caller_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: @streaming_compatible_caller_streaming_callee_dont_inline( +; CHECK: call void @streaming_callee() +entry: + call void @streaming_callee() + ret void +} + +define void @streaming_compatible_caller_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: @streaming_compatible_caller_locally_streaming_callee_dont_inline( +; CHECK: call void @locally_streaming_callee() +entry: + call void @locally_streaming_callee() + ret void +} + +define void @streaming_compatible_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: @streaming_compatible_caller_streaming_compatible_callee_inline( +; CHECK: call void @inlined_streaming_compatible_body() +entry: + call void @streaming_compatible_callee() + ret void +} + +define void @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: @streaming_compatible_caller_streaming_compatible_locally_streaming_callee_dont_inline( +; CHECK: call void @streaming_compatible_locally_streaming_callee() +entry: + call void @streaming_compatible_locally_streaming_callee() + ret void +} + +define void @streaming_compatible_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" { +; CHECK-LABEL: @streaming_compatible_caller_normal_callee_dont_inline( +; CHECK: call void @normal_callee() +entry: + call void @normal_callee() + ret void +} + +define void @normal_caller_streaming_callee_dont_inline() { +; CHECK-LABEL: @normal_caller_streaming_callee_dont_inline( +; CHECK: call void @streaming_callee() +entry: + call void @streaming_callee() + ret void +} + +define void @normal_caller_locally_streaming_callee_dont_inline() { +; CHECK-LABEL: @normal_caller_locally_streaming_callee_dont_inline( +; CHECK: call void @locally_streaming_callee() +entry: + call void @locally_streaming_callee() + ret void +} + +define void @normal_caller_streaming_compatible_callee_inline() { +; CHECK-LABEL: @normal_caller_streaming_compatible_callee_inline( +; CHECK: call void @inlined_streaming_compatible_body() +entry: + call void @streaming_compatible_callee() + ret void +} + +define void @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline() { +; CHECK-LABEL: @normal_caller_streaming_compatible_locally_streaming_callee_dont_inline( +; CHECK: call void @streaming_compatible_locally_streaming_callee() +entry: + call void @streaming_compatible_locally_streaming_callee() + ret void +} + +define void @normal_caller_normal_callee_inline() { +; CHECK-LABEL: @normal_caller_normal_callee_inline( +; CHECK: call void @inlined_normal_body() +entry: + call void @normal_callee() + ret void +} + + +define void @streaming_compatible_locally_streaming_caller_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_streaming_callee_inline( +; CHECK: call void @inlined_streaming_body() +entry: + call void @streaming_callee() + ret void +} + +define void @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_locally_streaming_callee_inline( +; CHECK: call void @inlined_locally_streaming_body() +entry: + call void @locally_streaming_callee() + ret void +} + +define void @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_streaming_compatible_callee_inline( +; CHECK: call void @inlined_streaming_compatible_body() +entry: + call void @streaming_compatible_callee() + ret void +} + +define void @streaming_compatible_locally_streaming_caller_and_callee_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_and_callee_inline( +; CHECK: call void @inlined_streaming_compatible_locally_streaming_body() +entry: + call void @streaming_compatible_locally_streaming_callee() + ret void +} + +define void @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" { +; CHECK-LABEL: @streaming_compatible_locally_streaming_caller_normal_callee_dont_inline( +; CHECK: call void @normal_callee() +entry: + call void @normal_callee() + ret void +} diff --git a/llvm/test/Transforms/Inline/AArch64/sme-za-attr.ll b/llvm/test/Transforms/Inline/AArch64/sme-za-attr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/sme-za-attr.ll @@ -0,0 +1,86 @@ +; RUN: opt -mtriple=aarch64-unknown-linux-gnu -mattr=+sme -S -inline < %s | FileCheck %s + +declare void @inlined_normal_body(); +declare void @inlined_shared_za_body() "aarch64_pstate_za_shared"; +declare void @inlined_new_za_body() "aarch64_pstate_za_new"; + +define void @normal_callee() { +entry: + call void @inlined_normal_body() + ret void +} + +define void @shared_za_callee() "aarch64_pstate_za_shared" { +entry: + call void @inlined_shared_za_body() + ret void +} + +define void @new_za_callee() "aarch64_pstate_za_new" { + call void @inlined_new_za_body() + ret void +} + +define void @normal_caller_normal_callee_inline() { +; CHECK-LABEL: @normal_caller_normal_callee_inline( +; CHECK: call void @inlined_normal_body() +entry: + call void @normal_callee() + ret void +} + +define void @normal_caller_new_za_callee_dont_inline() { +; CHECK-LABEL: @normal_caller_new_za_callee_dont_inline( +; CHECK: call void @new_za_callee() +entry: + call void @new_za_callee() + ret void +} + +define void @new_za_caller_normal_callee_dont_inline() "aarch64_pstate_za_new" { +; CHECK-LABEL: @new_za_caller_normal_callee_dont_inline( +; CHECK: call void @normal_callee() +entry: + call void @normal_callee() + ret void +} + +define void @new_za_caller_shared_za_callee_inline() "aarch64_pstate_za_new" { +; CHECK-LABEL: @new_za_caller_shared_za_callee_inline( +; CHECK: call void @inlined_shared_za_body() +entry: + call void @shared_za_callee() + ret void +} + +define void @new_za_caller_new_za_callee_dont_inline() "aarch64_pstate_za_new" { +; CHECK-LABEL: @new_za_caller_new_za_callee_dont_inline( +; CHECK: call void @new_za_callee() +entry: + call void @new_za_callee() + ret void +} + +define void @shared_za_caller_normal_callee_dont_inline() "aarch64_pstate_za_shared" { +; CHECK-LABEL: @shared_za_caller_normal_callee_dont_inline( +; CHECK: call void @normal_callee() +entry: + call void @normal_callee() + ret void +} + +define void @shared_za_caller_shared_za_callee_inline() "aarch64_pstate_za_shared" { +; CHECK-LABEL: @shared_za_caller_shared_za_callee_inline( +; CHECK: call void @inlined_shared_za_body() +entry: + call void @shared_za_callee() + ret void +} + +define void @shared_za_caller_new_za_callee_dont_inline() "aarch64_pstate_za_shared" { +; CHECK-LABEL: @shared_za_caller_new_za_callee_dont_inline( +; CHECK: call void @new_za_callee() +entry: + call void @new_za_callee() + ret void +} diff --git a/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll b/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sme-svcount.ll @@ -0,0 +1,20 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: ret aarch64_svcount [[VAL:%.*]] +; + %ptr = alloca aarch64_svcount, align 1 + store aarch64_svcount %val, ptr %ptr + %res = load aarch64_svcount, ptr %ptr + ret aarch64_svcount %res +} + +; Test that instcombine doesn't try to query the (scalable) size of aarch64_svcount +; in foldSelectInstWithICmp. +define aarch64_svcount @test_combine_on_select(aarch64_svcount %x, aarch64_svcount %y, i32 %k) { + %cmp = icmp sgt i32 %k, 42 + %x.y = select i1 %cmp, aarch64_svcount %x, aarch64_svcount %y + ret aarch64_svcount %x.y +} diff --git a/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll b/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SROA/aarch64-sme-svcount.ll @@ -0,0 +1,15 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -sroa -S < %s | FileCheck %s + +define aarch64_svcount @test_alloca_store_reload(aarch64_svcount %val) nounwind { +; CHECK-LABEL: @test_alloca_store_reload( +; CHECK-NEXT: [[PTR:%.*]] = alloca aarch64_svcount, align 1 +; CHECK-NEXT: store aarch64_svcount [[VAL:%.*]], ptr [[PTR]], align 2 +; CHECK-NEXT: [[RES:%.*]] = load aarch64_svcount, ptr [[PTR]], align 2 +; CHECK-NEXT: ret aarch64_svcount [[RES]] +; + %ptr = alloca aarch64_svcount, align 1 + store aarch64_svcount %val, ptr %ptr + %res = load aarch64_svcount, ptr %ptr + ret aarch64_svcount %res +} diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp --- a/llvm/utils/TableGen/CodeGenTarget.cpp +++ b/llvm/utils/TableGen/CodeGenTarget.cpp @@ -77,6 +77,7 @@ case MVT::ppcf128: return "MVT::ppcf128"; case MVT::x86mmx: return "MVT::x86mmx"; case MVT::x86amx: return "MVT::x86amx"; + case MVT::aarch64svcount: return "MVT::aarch64svcount"; case MVT::i64x8: return "MVT::i64x8"; case MVT::Glue: return "MVT::Glue"; case MVT::isVoid: return "MVT::isVoid"; diff --git a/llvm/utils/TableGen/IntrinsicEmitter.cpp b/llvm/utils/TableGen/IntrinsicEmitter.cpp --- a/llvm/utils/TableGen/IntrinsicEmitter.cpp +++ b/llvm/utils/TableGen/IntrinsicEmitter.cpp @@ -257,6 +257,7 @@ IIT_ANYPTR_TO_ELT = 56, IIT_I2 = 57, IIT_I4 = 58, + IIT_AARCH64_SVCOUNT = 59, }; static void EncodeFixedValueType(MVT::SimpleValueType VT, @@ -297,6 +298,8 @@ return Sig.push_back(IIT_EXTERNREF); case MVT::funcref: return Sig.push_back(IIT_FUNCREF); + case MVT::aarch64svcount: + return Sig.push_back(IIT_AARCH64_SVCOUNT); } // clang-format on }