diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -289,6 +289,7 @@ bool isSMEZero() const { return Flags & IsSMEZero; } bool isSMELdr() const { return Flags & IsSMELdr; } bool isSMEStr() const { return Flags & IsSMEStr; } + bool isSMEAdd() const { return Flags & IsSMEAdd; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -215,6 +215,7 @@ def IsSMELdr : FlagType<0x20000000000>; def IsSMEStr : FlagType<0x40000000000>; def IsSMECnt : FlagType<0x80000000000>; +def IsSMEAdd : FlagType<0x100000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -2182,3 +2183,15 @@ def SVCNTSH : SInst<"svcntsh", "n", "", MergeNone, "aarch64_sme_cntsh", [IsOverloadNone, IsSME, IsSMECnt]>; def SVCNTSW : SInst<"svcntsw", "n", "", MergeNone, "aarch64_sme_cntsw", [IsOverloadNone, IsSME, IsSMECnt]>; def SVCNTSD : SInst<"svcntsd", "n", "", MergeNone, "aarch64_sme_cntsd", [IsOverloadNone, IsSME, IsSMECnt]>; + +//////////////////////////////////////////////////////////////////////////////// +// SME - ADDHA/ADDVA + +def SVADDHA_ZA32 : SInst<"svaddha_za32[_{d}]", "viPPd", "iUi", MergeNone, "aarch64_sme_addha", [IsSME, IsSMEAdd]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I16I64)" in { + def SVADDHA_ZA64 : SInst<"svaddha_za64[_{d}]", "viPPd", "lUl", MergeNone, "aarch64_sme_addha", [IsSME, IsSMEAdd]>; +} +def SVADDVA_ZA32 : SInst<"svaddva_za32[_{d}]", "viPPd", "iUi", MergeNone, "aarch64_sme_addva", [IsSME, IsSMEAdd]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I16I64)" in { + def SVADDVA_ZA64 : SInst<"svaddva_za64[_{d}]", "viPPd", "lUl", MergeNone, "aarch64_sme_addva", [IsSME, IsSMEAdd]>; +} diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -56,6 +56,7 @@ bool HasMOPS; bool HasRCPC; bool HasSME; + bool HasSMEI64; llvm::AArch64::ArchKind ArchKind; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -411,6 +411,9 @@ if ((FPU & NeonMode) && HasFP16FML) Builder.defineMacro("__ARM_FEATURE_FP16_FML", "1"); + if (HasSME && HasSMEI64) + Builder.defineMacro("__ARM_FEATURE_SME_I16I64", "1"); + if (Opts.hasSignReturnAddress()) { // Bitmask: // 0: Protection using the A key @@ -557,6 +560,7 @@ HasMOPS = false; HasRCPC = false; HasSME = false; + HasSMEI64 = false; ArchKind = llvm::AArch64::ArchKind::INVALID; @@ -568,6 +572,12 @@ HasBFloat16 = true; HasFullFP16 = true; } + if (Feature == "+sme-i64") { + HasSME = true; + HasSMEI64 = true; + HasBFloat16 = true; + HasFullFP16 = true; + } if (Feature == "+sve") { FPU |= SveMode; HasFullFP16 = true; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9149,6 +9149,17 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSMEAdd(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + auto *DefaultType = getSVEType(TypeFlags); + Ops[0] = Builder.CreateIntCast(Ops[0], Int64Ty, false); + Ops[1] = EmitSVEPredicateCast(Ops[1], DefaultType); + Ops[2] = EmitSVEPredicateCast(Ops[2], DefaultType); + Function *F = CGM.getIntrinsic(IntID, {DefaultType}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9289,6 +9300,8 @@ return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isSMELdr() || TypeFlags.isSMEStr()) return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isSMEAdd()) + return EmitSMEAdd(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isGatherLoad()) return EmitSVEGatherLoad(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isScatterStore()) diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4246,6 +4246,9 @@ llvm::Value *EmitSMELdrStr(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMEAdd(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i32.c @@ -0,0 +1,159 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svaddha_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za32_u32u10__SVBool_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { + SME_ACLE_FUNC(svaddha_za32, _u32,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddha_za32_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddha_za32_u32_1u10__SVBool_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) { + SME_ACLE_FUNC(svaddha_za32, _u32,)(3, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddha_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za32_s32u10__SVBool_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { + SME_ACLE_FUNC(svaddha_za32, _s32,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddha_za32_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddha_za32_s32_1u10__SVBool_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) { + SME_ACLE_FUNC(svaddha_za32, _s32,)(3, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za32_u32u10__SVBool_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za32_u32(svbool_t pn, svbool_t pm, svuint32_t zn) { + SME_ACLE_FUNC(svaddva_za32, _u32,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za32_u32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddva_za32_u32_1u10__SVBool_tu10__SVBool_tu12__SVUint32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za32_u32_1(svbool_t pn, svbool_t pm, svuint32_t zn) { + SME_ACLE_FUNC(svaddva_za32, _u32,)(3, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za32_s32u10__SVBool_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za32_s32(svbool_t pn, svbool_t pm, svint32_t zn) { + SME_ACLE_FUNC(svaddva_za32, _s32,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za32_s32_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddva_za32_s32_1u10__SVBool_tu10__SVBool_tu11__SVInt32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv4i32(i64 3, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za32_s32_1(svbool_t pn, svbool_t pm, svint32_t zn) { + SME_ACLE_FUNC(svaddva_za32, _s32,)(3, pn, pm, zn); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_add-i64.c @@ -0,0 +1,159 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svaddha_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za64_u64u10__SVBool_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { + SME_ACLE_FUNC(svaddha_za64, _u64,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddha_za64_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddha_za64_u64_1u10__SVBool_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) { + SME_ACLE_FUNC(svaddha_za64, _u64,)(7, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddha_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddha_za64_s64u10__SVBool_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { + SME_ACLE_FUNC(svaddha_za64, _s64,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddha_za64_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddha_za64_s64_1u10__SVBool_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addha.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddha_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) { + SME_ACLE_FUNC(svaddha_za64, _s64,)(7, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za64_u64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za64_u64u10__SVBool_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za64_u64(svbool_t pn, svbool_t pm, svuint64_t zn) { + SME_ACLE_FUNC(svaddva_za64, _u64,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za64_u64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddva_za64_u64_1u10__SVBool_tu10__SVBool_tu12__SVUint64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za64_u64_1(svbool_t pn, svbool_t pm, svuint64_t zn) { + SME_ACLE_FUNC(svaddva_za64, _u64,)(7, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za64_s64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svaddva_za64_s64u10__SVBool_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za64_s64(svbool_t pn, svbool_t pm, svint64_t zn) { + SME_ACLE_FUNC(svaddva_za64, _s64,)(0, pn, pm, zn); +} + +// CHECK-LABEL: @test_svaddva_za64_s64_1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z23test_svaddva_za64_s64_1u10__SVBool_tu10__SVBool_tu11__SVInt64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.addva.nxv2i64(i64 7, [[TMP0]], [[TMP1]], [[ZN:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svaddva_za64_s64_1(svbool_t pn, svbool_t pm, svint64_t zn) { + SME_ACLE_FUNC(svaddva_za64, _s64,)(7, pn, pm, zn); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -879,6 +879,8 @@ this->SMEAttributes = "arm_streaming_compatible, arm_shared_za, arm_preserves_za"; else if (this->Flags & Emitter.getEnumValueForFlag("IsSMECnt")) this->SMEAttributes = "arm_streaming_compatible, arm_preserves_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEAdd")) + this->SMEAttributes = "arm_streaming, arm_shared_za"; else llvm_unreachable("Unknown SME instruction"); } else