diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -284,6 +284,8 @@ bool isSME() const { return Flags & IsSME; } bool isSMELd1() const { return Flags & IsSMELd1; } bool isSMESt1() const { return Flags & IsSMESt1; } + bool isSMERead() const { return Flags & IsSMERead; } + bool isSMEWrite() const { return Flags & IsSMEWrite; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -209,6 +209,8 @@ def IsSME : FlagType<0x800000000>; def IsSMELd1 : FlagType<0x1000000000>; def IsSMESt1 : FlagType<0x2000000000>; +def IsSMERead : FlagType<0x4000000000>; +def IsSMEWrite : FlagType<0x8000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -2129,3 +2131,31 @@ def SVBGRP : SInst<"svbgrp[_{d}]", "ddd", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; def SVBGRP_N : SInst<"svbgrp[_n_{d}]", "dda", "UcUsUiUl", MergeNone, "aarch64_sve_bgrp_x">; } + +//////////////////////////////////////////////////////////////////////////////// +// SME - Read horizontal/vertical ZA slices + +def SVREAD_HOR_ZA8 : SInst<"svread_hor_za8[_{d}]", "ddPimi", "cUc", MergeOp1, "aarch64_sme_read_horiz", [IsSME, IsSMERead]>; +def SVREAD_HOR_ZA16 : SInst<"svread_hor_za16[_{d}]", "ddPimi", "sUshb", MergeOp1, "aarch64_sme_read_horiz", [IsSME, IsSMERead]>; +def SVREAD_HOR_ZA32 : SInst<"svread_hor_za32[_{d}]", "ddPimi", "iUif", MergeOp1, "aarch64_sme_read_horiz", [IsSME, IsSMERead]>; +def SVREAD_HOR_ZA64 : SInst<"svread_hor_za64[_{d}]", "ddPimi", "lUld", MergeOp1, "aarch64_sme_read_horiz", [IsSME, IsSMERead]>; +def SVREAD_HOR_ZA128 : SInst<"svread_hor_za128[_{d}]", "ddPimi", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_readq_horiz", [IsSME, IsSMERead]>; +def SVREAD_VER_ZA8 : SInst<"svread_ver_za8[_{d}]", "ddPimi", "cUc", MergeOp1, "aarch64_sme_read_vert", [IsSME, IsSMERead]>; +def SVREAD_VER_ZA16 : SInst<"svread_ver_za16[_{d}]", "ddPimi", "sUshb", MergeOp1, "aarch64_sme_read_vert", [IsSME, IsSMERead]>; +def SVREAD_VER_ZA32 : SInst<"svread_ver_za32[_{d}]", "ddPimi", "iUif", MergeOp1, "aarch64_sme_read_vert", [IsSME, IsSMERead]>; +def SVREAD_VER_ZA64 : SInst<"svread_ver_za64[_{d}]", "ddPimi", "lUld", MergeOp1, "aarch64_sme_read_vert", [IsSME, IsSMERead]>; +def SVREAD_VER_ZA128 : SInst<"svread_ver_za128[_{d}]", "ddPimi", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_readq_vert", [IsSME, IsSMERead]>; + +//////////////////////////////////////////////////////////////////////////////// +// SME - Write horizontal/vertical ZA slices + +def SVWRITE_HOR_ZA8 : SInst<"svwrite_hor_za8[_{d}]", "vimiPd", "cUc", MergeOp1, "aarch64_sme_write_horiz", [IsSME, IsSMEWrite]>; +def SVWRITE_HOR_ZA16 : SInst<"svwrite_hor_za16[_{d}]", "vimiPd", "sUshb", MergeOp1, "aarch64_sme_write_horiz", [IsSME, IsSMEWrite]>; +def SVWRITE_HOR_ZA32 : SInst<"svwrite_hor_za32[_{d}]", "vimiPd", "iUif", MergeOp1, "aarch64_sme_write_horiz", [IsSME, IsSMEWrite]>; +def SVWRITE_HOR_ZA64 : SInst<"svwrite_hor_za64[_{d}]", "vimiPd", "lUld", MergeOp1, "aarch64_sme_write_horiz", [IsSME, IsSMEWrite]>; +def SVWRITE_HOR_ZA128 : SInst<"svwrite_hor_za128[_{d}]", "vimiPd", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_writeq_horiz", [IsSME, IsSMEWrite]>; +def SVWRITE_VER_ZA8 : SInst<"svwrite_ver_za8[_{d}]", "vimiPd", "cUc", MergeOp1, "aarch64_sme_write_vert", [IsSME, IsSMEWrite]>; +def SVWRITE_VER_ZA16 : SInst<"svwrite_ver_za16[_{d}]", "vimiPd", "sUshb", MergeOp1, "aarch64_sme_write_vert", [IsSME, IsSMEWrite]>; +def SVWRITE_VER_ZA32 : SInst<"svwrite_ver_za32[_{d}]", "vimiPd", "iUif", MergeOp1, "aarch64_sme_write_vert", [IsSME, IsSMEWrite]>; +def SVWRITE_VER_ZA64 : SInst<"svwrite_ver_za64[_{d}]", "vimiPd", "lUld", MergeOp1, "aarch64_sme_write_vert", [IsSME, IsSMEWrite]>; +def SVWRITE_VER_ZA128 : SInst<"svwrite_ver_za128[_{d}]", "vimiPd", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_writeq_vert", [IsSME, IsSMEWrite]>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9097,6 +9097,31 @@ return Builder.CreateCall(F, NewOps); } +Value *CodeGenFunction::EmitSMERead(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + auto *ResultTy = getSVEType(TypeFlags); + Ops[1] = EmitSVEPredicateCast(Ops[1], ResultTy); + Ops[2] = Builder.CreateIntCast(Ops[2], Int64Ty, false); + Ops[3] = EmitTileslice(Ops[4], Ops[3]); + // Erase the slice_offset operand. + Ops.erase(&Ops[4]); + Function *F = CGM.getIntrinsic(IntID, ResultTy); + return Builder.CreateCall(F, Ops); +} + +Value *CodeGenFunction::EmitSMEWrite(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + Ops[0] = Builder.CreateIntCast(Ops[0], Int64Ty, false); + Ops[1] = EmitTileslice(Ops[2], Ops[1]); + Ops[2] = EmitSVEPredicateCast(Ops[3], getSVEType(TypeFlags)); + // Erase the slice_offset operand. + Ops.erase(&Ops[3]); + Function *F = CGM.getIntrinsic(IntID, {getSVEType(TypeFlags)}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9229,6 +9254,10 @@ return EmitSVEMaskedStore(E, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isSMELd1() || TypeFlags.isSMESt1()) return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isSMERead()) + return EmitSMERead(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isSMEWrite()) + return EmitSMEWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherLoad()) return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isScatterStore()) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4234,6 +4234,12 @@ llvm::Value *EmitSMELd1St1(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMERead(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); + llvm::Value *EmitSMEWrite(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_read.c @@ -0,0 +1,1568 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + + +// CHECK-LABEL: @test_svread_hor_za8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svread_hor_za8_s8u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za8_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za8_s8_1u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-LABEL: @test_svread_hor_za16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za16_s16u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za16_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za16_s16_1u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_hor_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za32_s32u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za32_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za32_s32_1u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-LABEL: @test_svread_hor_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za64_s64u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za64_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za64_s64_1u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-LABEL: @test_svread_hor_za8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svread_hor_za8_u8u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za8_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za8_u8_1u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-LABEL: @test_svread_hor_za16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za16_u16u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za16_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za16_u16_1u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_hor_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za32_u32u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za32_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za32_u32_1u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-LABEL: @test_svread_hor_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za64_u64u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za64_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za64_u64_1u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-LABEL: @test_svread_hor_za16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za16_f16u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za16_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_hor_za16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za16_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za16_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_hor_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za32_f32u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za32_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-LABEL: @test_svread_hor_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za64_f64u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za64_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-LABEL: @test_svread_hor_za128_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za128_s8u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za128_s8_1u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_hor_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_s16u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_s16_1u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_hor_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_s32u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_s32_1u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_hor_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_s64u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_s64_1u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_hor_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_hor_za128_u8u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za128_u8_1u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_hor_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_u16u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_u16_1u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_hor_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_u32u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_u32_1u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_hor_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_u64u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_u64_1u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_hor_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_f16u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_hor_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_hor_za128_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_hor_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_hor_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_f32u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_hor_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_hor_za128_f64u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_hor_za128_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_hor_za128_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.horiz.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_hor_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_hor_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svread_ver_za8_s8u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za8_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za8_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za8_s8_1u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za8_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _s8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-LABEL: @test_svread_ver_za16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za16_s16u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za16_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za16_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za16_s16_1u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za16_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _s16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_ver_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za32_s32u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za32_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za32_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za32_s32_1u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za32_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _s32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-LABEL: @test_svread_ver_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za64_s64u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za64_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za64_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za64_s64_1u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za64_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _s64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-LABEL: @test_svread_ver_za8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z22test_svread_ver_za8_u8u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za8_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za8_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za8_u8_1u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za8_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za8, _u8, _m)(zd, pg, 0, slice_base, 15); +} + +// CHECK-LABEL: @test_svread_ver_za16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za16_u16u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za16_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za16_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za16_u16_1u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za16_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _u16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_ver_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za32_u32u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za32_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za32_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za32_u32_1u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za32_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _u32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-LABEL: @test_svread_ver_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za64_u64u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za64_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za64_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za64_u64_1u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za64_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _u64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-LABEL: @test_svread_ver_za16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za16_f16u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za16_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za16_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za16_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za16_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _f16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_ver_za16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za16_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za16_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za16_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za16_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 1, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za16_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za16, _bf16, _m)(zd, pg, 1, slice_base, 7); +} + +// CHECK-LABEL: @test_svread_ver_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za32_f32u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za32_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za32_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za32_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 3, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za32_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za32, _f32, _m)(zd, pg, 3, slice_base, 3); +} + +// CHECK-LABEL: @test_svread_ver_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za64_f64u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za64_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za64_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za64_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.read.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 7, i32 [[TILESLICE]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za64_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za64, _f64, _m)(zd, pg, 7, slice_base, 1); +} + +// CHECK-LABEL: @test_svread_ver_za128_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za128_s8u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za128_s8(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za128_s8_1u10__SVInt8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svint8_t test_svread_ver_za128_s8_1(svint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_s16u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za128_s16(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_s16_1u11__SVInt16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint16_t test_svread_ver_za128_s16_1(svint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_s32u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za128_s32(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_s32_1u11__SVInt32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint32_t test_svread_ver_za128_s32_1(svint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_s64u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za128_s64(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_s64_1u11__SVInt64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svint64_t test_svread_ver_za128_s64_1(svint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _s64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z24test_svread_ver_za128_u8u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za128_u8(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za128_u8_1u11__SVUint8_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv16i8( [[ZD:%.*]], [[PG:%.*]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +__attribute__((arm_streaming)) svuint8_t test_svread_ver_za128_u8_1(svuint8_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u8, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_u16u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za128_u16(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_u16_1u12__SVUint16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8i16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint16_t test_svread_ver_za128_u16_1(svuint16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_u32u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za128_u32(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_u32_1u12__SVUint32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4i32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint32_t test_svread_ver_za128_u32_1(svuint32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_u64u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za128_u64(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_u64_1u12__SVUint64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2i64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svuint64_t test_svread_ver_za128_u64_1(svuint64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _u64, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_f16u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za128_f16(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_f16_1u13__SVFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8f16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat16_t test_svread_ver_za128_f16_1(svfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z26test_svread_ver_za128_bf16u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za128_bf16(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z28test_svread_ver_za128_bf16_1u14__SVBFloat16_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv8bf16( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svbfloat16_t test_svread_ver_za128_bf16_1(svbfloat16_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _bf16, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_f32u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za128_f32(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_f32_1u13__SVFloat32_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv4f32( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat32_t test_svread_ver_za128_f32_1(svfloat32_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f32, _m)(zd, pg, 15, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z25test_svread_ver_za128_f64u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 0, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za128_f64(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 0, slice_base, 0); +} + +// CHECK-LABEL: @test_svread_ver_za128_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CHECK-NEXT: ret [[TMP1]] +// +// CPP-CHECK-LABEL: @_Z27test_svread_ver_za128_f64_1u13__SVFloat64_tu10__SVBool_tj( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sme.readq.vert.nxv2f64( [[ZD:%.*]], [[TMP0]], i64 15, i32 [[SLICE_BASE:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP1]] +// +__attribute__((arm_streaming)) svfloat64_t test_svread_ver_za128_f64_1(svfloat64_t zd, svbool_t pg, uint32_t slice_base) { + return SME_ACLE_FUNC(svread_ver_za128, _f64, _m)(zd, pg, 15, slice_base, 0); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_write.c @@ -0,0 +1,1568 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svwrite_hor_za8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svwrite_hor_za8_s8ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za8_s8_1ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _s8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za16_s16ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za16_s16_1ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _s16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za32_s32ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za32_s32_1ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _s32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za64_s64ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za64_s64_1ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _s64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svwrite_hor_za8_u8ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za8_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za8_u8_1ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za8, _u8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za16_u16ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za16_u16_1ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _u16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za32_u32ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za32_u32_1ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _u32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za64_u64ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za64_u64_1ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _u64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za16_f16ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za16_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8f16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _f16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za16_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za16_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv8bf16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za32_f32ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za32_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za32_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv4f32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za32, _f32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za64_f64ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za64_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za64_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.horiz.nxv2f64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za64, _f64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za128_s8ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za128_s8_1ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_s16ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_s16_1ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_s32ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_s32_1ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_s64ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_s64_1ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _s64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_hor_za128_u8ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za128_u8_1ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_u16ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_u16_1ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_u32ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_u32_1ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_u64ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_u64_1ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _u64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_f16ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8f16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_hor_za128_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_hor_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv8bf16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_f32ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv4f32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_hor_za128_f64ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_hor_za128_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_hor_za128_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.horiz.nxv2f64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_hor_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_hor_za128, _f64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svwrite_ver_za8_s8ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za8_s8_1ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _s8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za16_s16ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za16_s16_1ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _s16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za32_s32ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za32_s32_1ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _s32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za64_s64ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za64_s64_1ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _s64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svwrite_ver_za8_u8ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za8_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za8_u8_1ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 15 +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv16i8(i64 0, i32 [[TILESLICE]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za8_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za8, _u8, _m)(0, slice_base, 15, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za16_u16ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za16_u16_1ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8i16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _u16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za32_u32ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za32_u32_1ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4i32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _u32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za64_u64ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za64_u64_1ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2i64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _u64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za16_f16ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za16_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8f16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _f16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za16_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za16_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za16_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 7 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv8bf16(i64 1, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za16_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za16, _bf16, _m)(1, slice_base, 7, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za32_f32ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za32_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za32_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 3 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv4f32(i64 3, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za32_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za32, _f32, _m)(3, slice_base, 3, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za64_f64ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za64_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za64_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TILESLICE:%.*]] = add i32 [[SLICE_BASE:%.*]], 1 +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.write.vert.nxv2f64(i64 7, i32 [[TILESLICE]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za64_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za64, _f64, _m)(7, slice_base, 1, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za128_s8ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s8(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za128_s8_1ju10__SVBool_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s8_1(uint32_t slice_base, svbool_t pg, svint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_s16ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s16(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_s16_1ju10__SVBool_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s16_1(uint32_t slice_base, svbool_t pg, svint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_s32ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s32(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_s32_1ju10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s32_1(uint32_t slice_base, svbool_t pg, svint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_s64ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s64(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_s64_1ju10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_s64_1(uint32_t slice_base, svbool_t pg, svint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _s64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z25test_svwrite_ver_za128_u8ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 0, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u8(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u8_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za128_u8_1ju10__SVBool_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv16i8(i64 15, i32 [[SLICE_BASE:%.*]], [[PG:%.*]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u8_1(uint32_t slice_base, svbool_t pg, svuint8_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u8, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_u16ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u16(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_u16_1ju10__SVBool_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8i16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u16_1(uint32_t slice_base, svbool_t pg, svuint16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_u32ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u32(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_u32_1ju10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4i32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u32_1(uint32_t slice_base, svbool_t pg, svuint32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_u64ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u64(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_u64_1ju10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2i64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_u64_1(uint32_t slice_base, svbool_t pg, svuint64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _u64, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_f16ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f16(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_f16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_f16_1ju10__SVBool_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8f16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f16_1(uint32_t slice_base, svbool_t pg, svfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z27test_svwrite_ver_za128_bf16ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_bf16(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_bf16_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z29test_svwrite_ver_za128_bf16_1ju10__SVBool_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv8bf16(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_bf16_1(uint32_t slice_base, svbool_t pg, svbfloat16_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _bf16, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_f32ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f32(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_f32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_f32_1ju10__SVBool_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv4f32(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f32_1(uint32_t slice_base, svbool_t pg, svfloat32_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f32, _m)(15, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z26test_svwrite_ver_za128_f64ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 0, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f64(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(0, slice_base, 0, pg, zn); +} + +// CHECK-LABEL: @test_svwrite_ver_za128_f64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z28test_svwrite_ver_za128_f64_1ju10__SVBool_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PG:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.writeq.vert.nxv2f64(i64 15, i32 [[SLICE_BASE:%.*]], [[TMP0]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svwrite_ver_za128_f64_1(uint32_t slice_base, svbool_t pg, svfloat64_t zn) { + SME_ACLE_FUNC(svwrite_ver_za128, _f64, _m)(15, slice_base, 0, pg, zn); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -867,6 +867,10 @@ this->SMEAttributes = "arm_streaming, arm_shared_za"; else if (this->Flags & Emitter.getEnumValueForFlag("IsSMESt1")) this->SMEAttributes = "arm_streaming, arm_shared_za, arm_preserves_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMERead")) + this->SMEAttributes = "arm_streaming, arm_shared_za, arm_preserves_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEWrite")) + this->SMEAttributes = "arm_streaming, arm_shared_za"; else llvm_unreachable("Unknown SME instruction"); } else @@ -1351,6 +1355,8 @@ OS << "/* Function attributes */\n"; OS << "#define __ai static __inline__ __attribute__((__always_inline__, " "__nodebug__))\n\n"; + OS << "#define __aio static __inline__ __attribute__((__always_inline__, " + "__nodebug__, __overloadable__))\n\n"; OS << "#ifdef __cplusplus\n"; OS << "extern \"C\" {\n";