diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -306,6 +306,8 @@ bool isTupleCreate() const { return Flags & IsTupleCreate; } bool isTupleGet() const { return Flags & IsTupleGet; } bool isTupleSet() const { return Flags & IsTupleSet; } + bool isRead() const { return Flags & IsRead; } + bool isWrite() const { return Flags & IsWrite; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -72,3 +72,45 @@ defm SVST1_ZA32 : ZAStore<"za32", "i", "aarch64_sme_st1w", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; defm SVST1_ZA64 : ZAStore<"za64", "l", "aarch64_sme_st1d", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; defm SVST1_ZA128 : ZAStore<"za128", "q", "aarch64_sme_st1q", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; + +//////////////////////////////////////////////////////////////////////////////// +// Read horizontal/vertical ZA slices + +multiclass ZARead ch> { + let TargetGuard = "sme" in { + def NAME # _H : SInst<"svread_hor_" # n_suffix # "[_{d}]", "ddPimi", t, MergeOp1, + "aarch64_sme_read" # !cond(!eq(n_suffix, "za128") : "q", true: "") # "_horiz", + [IsRead, IsStreaming, IsSharedZA, IsPreservesZA], ch>; + + def NAME # _V : SInst<"svread_ver_" # n_suffix # "[_{d}]", "ddPimi", t, MergeOp1, + "aarch64_sme_read" # !cond(!eq(n_suffix, "za128") : "q", true: "") # "_vert", + [IsRead, IsStreaming, IsSharedZA, IsPreservesZA], ch>; + } +} + +defm SVREAD_ZA8 : ZARead<"za8", "cUc", [ImmCheck<2, ImmCheck0_0>, ImmCheck<4, ImmCheck0_15>]>; +defm SVREAD_ZA16 : ZARead<"za16", "sUshb", [ImmCheck<2, ImmCheck0_1>, ImmCheck<4, ImmCheck0_7>]>; +defm SVREAD_ZA32 : ZARead<"za32", "iUif", [ImmCheck<2, ImmCheck0_3>, ImmCheck<4, ImmCheck0_3>]>; +defm SVREAD_ZA64 : ZARead<"za64", "lUld", [ImmCheck<2, ImmCheck0_7>, ImmCheck<4, ImmCheck0_1>]>; +defm SVREAD_ZA128 : ZARead<"za128", "csilUcUsUiUlhbfd", [ImmCheck<2, ImmCheck0_15>, ImmCheck<4, ImmCheck0_0>]>; + +//////////////////////////////////////////////////////////////////////////////// +// Write horizontal/vertical ZA slices + +multiclass ZAWrite ch> { + let TargetGuard = "sme" in { + def NAME # _H : SInst<"svwrite_hor_" # n_suffix # "[_{d}]", "vimiPd", t, MergeOp1, + "aarch64_sme_write" # !cond(!eq(n_suffix, "za128") : "q", true: "") # "_horiz", + [IsWrite, IsStreaming, IsSharedZA], ch>; + + def NAME # _V : SInst<"svwrite_ver_" # n_suffix # "[_{d}]", "vimiPd", t, MergeOp1, + "aarch64_sme_write" # !cond(!eq(n_suffix, "za128") : "q", true: "") # "_vert", + [IsWrite, IsStreaming, IsSharedZA], ch>; + } +} + +defm SVWRITE_ZA8 : ZAWrite<"za8", "cUc", [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_15>]>; +defm SVWRITE_ZA16 : ZAWrite<"za16", "sUshb", [ImmCheck<0, ImmCheck0_1>, ImmCheck<2, ImmCheck0_7>]>; +defm SVWRITE_ZA32 : ZAWrite<"za32", "iUif", [ImmCheck<0, ImmCheck0_3>, ImmCheck<2, ImmCheck0_3>]>; +defm SVWRITE_ZA64 : ZAWrite<"za64", "lUld", [ImmCheck<0, ImmCheck0_7>, ImmCheck<2, ImmCheck0_1>]>; +defm SVWRITE_ZA128 : ZAWrite<"za128", "csilUcUsUiUlhbfd", [ImmCheck<0, ImmCheck0_15>, ImmCheck<2, ImmCheck0_0>]>; diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -215,6 +215,8 @@ def IsStreamingCompatible : FlagType<0x2000000000>; def IsSharedZA : FlagType<0x4000000000>; def IsPreservesZA : FlagType<0x8000000000>; +def IsRead : FlagType<0x10000000000>; +def IsWrite : FlagType<0x20000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9371,6 +9371,23 @@ return Builder.CreateCall(F, NewOps); } +Value *CodeGenFunction::EmitSMEReadWrite(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + auto *VecTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(IntID, VecTy); + if (TypeFlags.isRead()) { + Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); + Ops[3] = EmitTileslice(Ops[4], Ops[3]); + Ops.erase(&Ops[4]); + } else if (TypeFlags.isWrite()) { + Ops[1] = EmitTileslice(Ops[2], Ops[1]); + Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy); + Ops.erase(&Ops[3]); + } + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9826,6 +9843,8 @@ SVETypeFlags TypeFlags(Builtin->TypeModifier); if (TypeFlags.isLoad() || TypeFlags.isStore()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isRead() || TypeFlags.isWrite()) + return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); /// Should not happen return nullptr; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4252,6 +4252,9 @@ llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMEReadWrite(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -0,0 +1,1078 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-C-LABEL: @test_svread_hor_za8_s8( +// CHECK-CXX-LABEL: @_Z22test_svread_hor_za8_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za8_s8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za8_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_s16( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_s16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_s32( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_s32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_s64( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_s64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_hor_za8_u8( +// CHECK-CXX-LABEL: @_Z22test_svread_hor_za8_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za8_u8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za8_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_u16( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_u16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_u32( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_u32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_u64( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_u64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_f16( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_f16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_bf16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za16_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_f32( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_f32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_f64( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_f64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s8( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za128_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s32( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s64( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u8( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za128_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u32( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u64( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_bf16( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svread_hor_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f32( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f64( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_s8( +// CHECK-CXX-LABEL: @_Z22test_svread_ver_za8_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_s8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za8_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_s16( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_s16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_s32( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_s32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_s64( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_s64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_u8( +// CHECK-CXX-LABEL: @_Z22test_svread_ver_za8_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_u8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za8_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_u16( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_u16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_u32( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_u32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_u64( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_u64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_f16( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_f16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_bf16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za16_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_f32( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_f32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_f64( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_f64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s8( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za128_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s32( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s64( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u8( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za128_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u32( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u64( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_bf16( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svread_ver_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f32( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f64( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c @@ -0,0 +1,1078 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-C-LABEL: @test_svwrite_hor_za8_s8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_hor_za8_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za8_s8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za8_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_s16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_s32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_s64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za8_u8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_hor_za8_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za8_u8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za8_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_u16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_u32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_u64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_f16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_bf16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za16_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_f32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_f64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za128_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za128_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_bf16( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z29test_svwrite_hor_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_s8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_ver_za8_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_s8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za8_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_s16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_s32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_s64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_u8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_ver_za8_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_u8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za8_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_u16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_u32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_u64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_f16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_bf16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za16_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_f32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_f64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za128_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za128_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_bf16( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z29test_svwrite_ver_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, 0, pg, zn); +} diff --git a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp --- a/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp +++ b/clang/test/Sema/aarch64-sme-intrinsics/acle_sme_imm.cpp @@ -30,6 +30,23 @@ SVE_ACLE_FUNC(svld1_hor_vnum_za128,,,)(0, -1, -1, pg, ptr, 1); // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} SVE_ACLE_FUNC(svst1_ver_vnum_za128,,,)(15, -1, 1, pg, ptr, 1); + + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, -1, -1, 0); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 1, -1, 15); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, 0, -1, -1); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 15, -1, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(-1, -1, 0, pg, svundef_s8()); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(1, -1, 15, pg, svundef_s8()); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(0, -1, -1, pg, svundef_s8()); + // expected-error@+1 {{argument value 1 is outside the valid range [0, 0]}} + SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(15, -1, 1, pg, svundef_s8()); } __attribute__((arm_streaming)) @@ -50,6 +67,23 @@ SVE_ACLE_FUNC(svld1_hor_vnum_za64,,,)(0, -1, -1, pg, ptr, 1); // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(7, -1, 2, pg, ptr, 1); + + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, -1, -1, 0); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 2, -1, 7); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, 0, -1, -1); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 7, -1, 2); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(-1, -1, 0, pg, svundef_s16()); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(2, -1, 7, pg, svundef_s16()); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(0, -1, -1, pg, svundef_s64()); + // expected-error@+1 {{argument value 2 is outside the valid range [0, 1]}} + SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(7, -1, 2, pg, svundef_s64()); } __attribute__((arm_streaming)) @@ -70,6 +104,23 @@ SVE_ACLE_FUNC(svld1_hor_vnum_za32,,,)(0, -1, -1, pg, ptr, 1); // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} SVE_ACLE_FUNC(svst1_ver_vnum_za32,,,)(3, -1, 4, pg, ptr, 1); + + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, -1, -1, 0); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 4, -1, 3); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svread_hor_za32, _s32, _m,)(svundef_s32(), pg, 0, -1, -1); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svread_ver_za32, _s32, _m,)(svundef_s32(), pg, 3, -1, 4); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(-1, -1, 0, pg, svundef_s32()); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(4, -1, 3, pg, svundef_s32()); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(0, -1, -1, pg, svundef_s32()); + // expected-error@+1 {{argument value 4 is outside the valid range [0, 3]}} + SVE_ACLE_FUNC(svwrite_ver_za32, _s32, _m,)(3, -1, 4, pg, svundef_s32()); } __attribute__((arm_streaming)) @@ -90,6 +141,23 @@ SVE_ACLE_FUNC(svld1_hor_vnum_za16,,,)(0, -1, -1, pg, ptr, 1); // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} SVE_ACLE_FUNC(svst1_ver_vnum_za16,,,)(1, -1, 8, pg, ptr, 1); + + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svread_hor_za64, _s64, _m,)(svundef_s64(), pg, -1, -1, 0); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svread_ver_za64, _s64, _m,)(svundef_s64(), pg, 8, -1, 1); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svread_hor_za16, _s16, _m,)(svundef_s16(), pg, 0, -1, -1); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, 1, -1, 8); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svwrite_hor_za64, _s64, _m,)(-1, -1, 0, pg, svundef_s64()); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(8, -1, 1, pg, svundef_s64()); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svwrite_hor_za16, _s16, _m,)(0, -1, -1, pg, svundef_s16()); + // expected-error@+1 {{argument value 8 is outside the valid range [0, 7]}} + SVE_ACLE_FUNC(svwrite_ver_za16, _s16, _m,)(1, -1, 8, pg, svundef_s16()); } __attribute__((arm_streaming)) @@ -110,6 +178,23 @@ SVE_ACLE_FUNC(svld1_hor_vnum_za8,,,)(0, -1, -1, pg, ptr, 1); // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} SVE_ACLE_FUNC(svst1_ver_vnum_za8,,,)(0, -1, 16, pg, ptr, 1); + + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svread_hor_za128, _s8, _m,)(svundef_s8(), pg, -1, -1, 0); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svread_ver_za128, _s8, _m,)(svundef_s8(), pg, 16, -1, 0); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, 0, -1, -1); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svread_ver_za8, _s8, _m,)(svundef_s8(), pg, 0, -1, 16); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svwrite_hor_za128, _s8, _m,)(-1, -1, 0, pg, svundef_s8()); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svwrite_ver_za128, _s8, _m,)(16, -1, 0, pg, svundef_s8()); + // expected-error@+1 {{argument value 18446744073709551615 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svwrite_hor_za8, _s8, _m,)(0, -1, -1, pg, svundef_s8()); + // expected-error@+1 {{argument value 16 is outside the valid range [0, 15]}} + SVE_ACLE_FUNC(svwrite_ver_za8, _s8, _m,)(0, -1, 16, pg, svundef_s8()); } __attribute__((arm_streaming)) @@ -122,4 +207,9 @@ SVE_ACLE_FUNC(svld1_ver_vnum_za16,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svld1_ver_vnum_za16' must be a constant integer}} SVE_ACLE_FUNC(svst1_hor_vnum_za32,,,)(u64, u64, 0, pg, ptr, u64); // expected-error {{argument to 'svst1_hor_vnum_za32' must be a constant integer}} SVE_ACLE_FUNC(svst1_ver_vnum_za64,,,)(0, u64, u64, pg, ptr, u64); // expected-error {{argument to 'svst1_ver_vnum_za64' must be a constant integer}} + + SVE_ACLE_FUNC(svread_hor_za8, _s8, _m,)(svundef_s8(), pg, 0, u64, u64); // expected-error-re {{argument to 'svread_hor_za8{{.*}}_m' must be a constant integer}} + SVE_ACLE_FUNC(svread_ver_za16, _s16, _m,)(svundef_s16(), pg, u64, u64, 0); // expected-error-re {{argument to 'svread_ver_za16{{.*}}_m' must be a constant integer}} + SVE_ACLE_FUNC(svwrite_hor_za32, _s32, _m,)(0, u64, u64, pg, svundef_s32()); // expected-error-re {{argument to 'svwrite_hor_za32{{.*}}_m' must be a constant integer}} + SVE_ACLE_FUNC(svwrite_ver_za64, _s64, _m,)(u64, u64, 0, pg, svundef_s64()); // expected-error-re {{argument to 'svwrite_ver_za64{{.*}}_m' must be a constant integer}} } diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1438,6 +1438,8 @@ OS << "/* Function attributes */\n"; OS << "#define __ai static __inline__ __attribute__((__always_inline__, " "__nodebug__))\n\n"; + OS << "#define __aio static __inline__ __attribute__((__always_inline__, " + "__nodebug__, __overloadable__))\n\n"; OS << "#ifdef __cplusplus\n"; OS << "extern \"C\" {\n";