diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -364,6 +364,7 @@ bool isLoad() const { return Flags & IsLoad; } bool isStore() const { return Flags & IsStore; } + bool isMove() const { return Flags & IsMove; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -64,3 +64,31 @@ def SVST1_VER_VNUM_ZA32 : MInst<"svst1_ver_vnum_za32", "vimiP%l", "i", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1w_vert">; def SVST1_VER_VNUM_ZA64 : MInst<"svst1_ver_vnum_za64", "vimiP%l", "l", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1d_vert">; def SVST1_VER_VNUM_ZA128 : MInst<"svst1_ver_vnum_za128", "vimiP%l", "q", [IsStore, IsOverloadNone, IsStreaming, IsSharedZA, IsPreservesZA], MemEltTyDefault, "aarch64_sme_st1q_vert">; + +//////////////////////////////////////////////////////////////////////////////// +// SME - Read horizontal/vertical ZA slices + +def SVREAD_HOR_ZA8 : SInst<"svread_hor_za8[_{d}]", "ddPimi", "cUc", MergeOp1, "aarch64_sme_read_horiz", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_HOR_ZA16 : SInst<"svread_hor_za16[_{d}]", "ddPimi", "sUshb", MergeOp1, "aarch64_sme_read_horiz", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_HOR_ZA32 : SInst<"svread_hor_za32[_{d}]", "ddPimi", "iUif", MergeOp1, "aarch64_sme_read_horiz", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_HOR_ZA64 : SInst<"svread_hor_za64[_{d}]", "ddPimi", "lUld", MergeOp1, "aarch64_sme_read_horiz", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_HOR_ZA128 : SInst<"svread_hor_za128[_{d}]", "ddPimi", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_readq_horiz", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_VER_ZA8 : SInst<"svread_ver_za8[_{d}]", "ddPimi", "cUc", MergeOp1, "aarch64_sme_read_vert", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_VER_ZA16 : SInst<"svread_ver_za16[_{d}]", "ddPimi", "sUshb", MergeOp1, "aarch64_sme_read_vert", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_VER_ZA32 : SInst<"svread_ver_za32[_{d}]", "ddPimi", "iUif", MergeOp1, "aarch64_sme_read_vert", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_VER_ZA64 : SInst<"svread_ver_za64[_{d}]", "ddPimi", "lUld", MergeOp1, "aarch64_sme_read_vert", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; +def SVREAD_VER_ZA128 : SInst<"svread_ver_za128[_{d}]", "ddPimi", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_readq_vert", [IsMove, IsStreaming, IsSharedZA, IsPreservesZA]>; + +//////////////////////////////////////////////////////////////////////////////// +// SME - Write horizontal/vertical ZA slices + +def SVWRITE_HOR_ZA8 : SInst<"svwrite_hor_za8[_{d}]", "vimiPd", "cUc", MergeOp1, "aarch64_sme_write_horiz", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_HOR_ZA16 : SInst<"svwrite_hor_za16[_{d}]", "vimiPd", "sUshb", MergeOp1, "aarch64_sme_write_horiz", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_HOR_ZA32 : SInst<"svwrite_hor_za32[_{d}]", "vimiPd", "iUif", MergeOp1, "aarch64_sme_write_horiz", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_HOR_ZA64 : SInst<"svwrite_hor_za64[_{d}]", "vimiPd", "lUld", MergeOp1, "aarch64_sme_write_horiz", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_HOR_ZA128 : SInst<"svwrite_hor_za128[_{d}]", "vimiPd", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_writeq_horiz", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_VER_ZA8 : SInst<"svwrite_ver_za8[_{d}]", "vimiPd", "cUc", MergeOp1, "aarch64_sme_write_vert", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_VER_ZA16 : SInst<"svwrite_ver_za16[_{d}]", "vimiPd", "sUshb", MergeOp1, "aarch64_sme_write_vert", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_VER_ZA32 : SInst<"svwrite_ver_za32[_{d}]", "vimiPd", "iUif", MergeOp1, "aarch64_sme_write_vert", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_VER_ZA64 : SInst<"svwrite_ver_za64[_{d}]", "vimiPd", "lUld", MergeOp1, "aarch64_sme_write_vert", [IsMove, IsStreaming, IsSharedZA]>; +def SVWRITE_VER_ZA128 : SInst<"svwrite_ver_za128[_{d}]", "vimiPd", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_writeq_vert", [IsMove, IsStreaming, IsSharedZA]>; diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -215,6 +215,7 @@ def IsStreamingCompatible : FlagType<0x1000000000>; def IsSharedZA : FlagType<0x2000000000>; def IsPreservesZA : FlagType<0x4000000000>; +def IsMove : FlagType<0x8000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9309,6 +9309,46 @@ return Builder.CreateAdd(Base, CastOffset, "tileslice"); } +// Return the llvm vector type corresponding to the specified element TypeFlags. +llvm::ScalableVectorType * +CodeGenFunction::getSVEType(const SMETypeFlags &TypeFlags) { + switch (TypeFlags.getEltType()) { + default: + llvm_unreachable("Invalid SMETypeFlag!"); + + case SMETypeFlags::EltTyInt8: + return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16); + case SMETypeFlags::EltTyInt16: + return llvm::ScalableVectorType::get(Builder.getInt16Ty(), 8); + case SMETypeFlags::EltTyInt32: + return llvm::ScalableVectorType::get(Builder.getInt32Ty(), 4); + case SMETypeFlags::EltTyInt64: + return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2); + case SMETypeFlags::EltTyInt128: + return llvm::ScalableVectorType::get(Builder.getInt128Ty(), 1); + + case SMETypeFlags::EltTyFloat16: + return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); + case SMETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); + case SMETypeFlags::EltTyFloat32: + return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); + case SMETypeFlags::EltTyFloat64: + return llvm::ScalableVectorType::get(Builder.getDoubleTy(), 2); + + case SMETypeFlags::EltTyBool8: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 16); + case SMETypeFlags::EltTyBool16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); + case SMETypeFlags::EltTyBool32: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); + case SMETypeFlags::EltTyBool64: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + case SMETypeFlags::EltTyBool128: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 1); + } +} + Value *CodeGenFunction::EmitSMELd1St1(SMETypeFlags TypeFlags, SmallVectorImpl &Ops, unsigned IntID) { @@ -9369,6 +9409,35 @@ return Builder.CreateCall(F, NewOps); } +Value *CodeGenFunction::EmitSMEReadWrite(SMETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + auto *VecTy = getSVEType(TypeFlags); + Function *F = CGM.getIntrinsic(IntID, VecTy); + switch (IntID) { + case Intrinsic::aarch64_sme_read_horiz: + case Intrinsic::aarch64_sme_readq_horiz: + case Intrinsic::aarch64_sme_read_vert: + case Intrinsic::aarch64_sme_readq_vert: { + Ops[1] = EmitSVEPredicateCast(Ops[1], VecTy); + Ops[3] = EmitTileslice(Ops[4], Ops[3]); + Ops.erase(&Ops[4]); + } break; + case Intrinsic::aarch64_sme_write_horiz: + case Intrinsic::aarch64_sme_writeq_horiz: + case Intrinsic::aarch64_sme_write_vert: + case Intrinsic::aarch64_sme_writeq_vert: { + Ops[1] = EmitTileslice(Ops[2], Ops[1]); + Ops[2] = EmitSVEPredicateCast(Ops[3], VecTy); + Ops.erase(&Ops[3]); + } break; + default: + llvm_unreachable("unexpected SME builtin ID"); + break; + } + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9823,6 +9892,8 @@ SMETypeFlags TypeFlags(Builtin->TypeModifier); if (TypeFlags.isLoad() || TypeFlags.isStore()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isMove()) + return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); /// Should not happen return nullptr; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4249,9 +4249,13 @@ unsigned IntID); llvm::Value *EmitAArch64SVEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::ScalableVectorType *getSVEType(const SMETypeFlags &TypeFlags); llvm::Value *EmitSMELd1St1(SMETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMEReadWrite(SMETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -0,0 +1,1079 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-C-LABEL: @test_svread_hor_za8_s8( +// CHECK-CXX-LABEL: @_Z22test_svread_hor_za8_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za8_s8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za8_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_s16( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_s16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_s32( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_s32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_s64( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_s64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_hor_za8_u8( +// CHECK-CXX-LABEL: @_Z22test_svread_hor_za8_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za8_u8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za8_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_u16( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_u16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_u32( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_u32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_u64( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_u64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_f16( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za16_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_f16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_bf16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za16_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_f32( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za32_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za32_f32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_f64( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za64_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za64_f64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s8( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za128_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s32( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s64( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u8( +// CHECK-CXX-LABEL: @_Z24test_svread_hor_za128_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u32( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u64( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f16( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_bf16( +// CHECK-CXX-LABEL: @_Z26test_svread_hor_za128_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svread_hor_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f32( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f64( +// CHECK-CXX-LABEL: @_Z25test_svread_hor_za128_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_hor_za128_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_hor_za128_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_s8( +// CHECK-CXX-LABEL: @_Z22test_svread_ver_za8_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_s8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za8_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_s16( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_s16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_s32( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_s32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_s64( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_s64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_u8( +// CHECK-CXX-LABEL: @_Z22test_svread_ver_za8_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za8_u8_1( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za8_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_u16( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_u16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_u32( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_u32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_u64( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_u64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_f16( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za16_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_f16_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_bf16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za16_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_f32( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za32_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za32_f32_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_f64( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za64_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za64_f64_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s8( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za128_s8u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_s8_1u10__SVInt8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s16u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s16_1u11__SVInt16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s32( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s32u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s32_1u11__SVInt32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s64( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_s64u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_s64_1u11__SVInt64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u8( +// CHECK-CXX-LABEL: @_Z24test_svread_ver_za128_u8u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u8_1( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_u8_1u11__SVUint8_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u16u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u16_1u12__SVUint16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u32( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u32u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u32_1u12__SVUint32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u64( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_u64u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_u64_1u12__SVUint64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f16( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f16u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_bf16( +// CHECK-CXX-LABEL: @_Z26test_svread_ver_za128_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svread_ver_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f32( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f32u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f64( +// CHECK-CXX-LABEL: @_Z25test_svread_ver_za128_f64u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-C-LABEL: @test_svread_ver_za128_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svread_ver_za128_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i32 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c @@ -0,0 +1,1079 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-C-LABEL: @test_svwrite_hor_za8_s8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_hor_za8_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za8_s8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za8_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_s16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_s32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_s64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za8_u8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_hor_za8_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za8_u8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za8_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_u16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_u32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_u64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_f16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za16_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za16_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_bf16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za16_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_f32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za32_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za32_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za32_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_f64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za64_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za64_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za64_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za128_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_s64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_hor_za128_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_u64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_bf16( +// CHECK-CXX-LABEL: @_Z27test_svwrite_hor_za128_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z29test_svwrite_hor_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_hor_za128_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_hor_za128_f64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_hor_za128_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_s8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_ver_za8_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_s8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za8_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_s16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_s16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_s32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_s32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_s64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_s64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_u8( +// CHECK-CXX-LABEL: @_Z23test_svwrite_ver_za8_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za8_u8_1( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za8_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i32 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_u16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_u16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_u32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_u32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_u64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_u64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_f16( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za16_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_f16_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za16_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_bf16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za16_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za16_bf16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i32 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_f32( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za32_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za32_f32_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za32_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i32 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_f64( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za64_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za64_f64_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za64_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i32 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za128_s8ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_s8_1ju10__SVBool_tu10__SVInt8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s16ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s16_1ju10__SVBool_tu11__SVInt16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s32ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s32_1ju10__SVBool_tu11__SVInt32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_s64ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_s64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_s64_1ju10__SVBool_tu11__SVInt64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u8( +// CHECK-CXX-LABEL: @_Z25test_svwrite_ver_za128_u8ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u8_1( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_u8_1ju10__SVBool_tu11__SVUint8_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i32 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u16ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u16_1ju10__SVBool_tu12__SVUint16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u32ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u32_1ju10__SVBool_tu12__SVUint32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_u64ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_u64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_u64_1ju10__SVBool_tu12__SVUint64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f16( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f16ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f16_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_bf16( +// CHECK-CXX-LABEL: @_Z27test_svwrite_ver_za128_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_bf16_1( +// CHECK-CXX-LABEL: @_Z29test_svwrite_ver_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f32( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f32ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f32_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f64( +// CHECK-CXX-LABEL: @_Z26test_svwrite_ver_za128_f64ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-C-LABEL: @test_svwrite_ver_za128_f64_1( +// CHECK-CXX-LABEL: @_Z28test_svwrite_ver_za128_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i32 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, 0, pg, zn); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -1483,6 +1483,8 @@ OS << "/* Function attributes */\n"; OS << "#define __ai static __inline__ __attribute__((__always_inline__, " "__nodebug__))\n\n"; + OS << "#define __aio static __inline__ __attribute__((__always_inline__, " + "__nodebug__, __overloadable__))\n\n"; OS << "#ifdef __cplusplus\n"; OS << "extern \"C\" {\n";