diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -216,6 +216,7 @@ def IsSMEStr : FlagType<0x40000000000>; def IsSMECnt : FlagType<0x80000000000>; def IsSMEAdd : FlagType<0x100000000000>; +def IsSMEMop : FlagType<0x200000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { @@ -2195,3 +2196,55 @@ let ArchGuard = "defined(__ARM_FEATURE_SME_I16I64)" in { def SVADDVA_ZA64 : SInst<"svaddva_za64[_{d}]", "viPPd", "lUl", MergeNone, "aarch64_sme_addva", [IsSME, IsSMEAdd]>; } + +//////////////////////////////////////////////////////////////////////////////// +// SME - BFMOPA, FMOPA (widening), SMOPA, UMOPA + +def SVMOPA_ZA32 : SInst<"svmopa_za32[_{d}]", "viPPdd", "cUchb", MergeNone, "", [IsSME, IsSMEMop]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I16I64)" in { + def SVMOPA_ZA64 : SInst<"svmopa_za64[_{d}]", "viPPdd", "sUs", MergeNone, "", [IsSME, IsSMEMop]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME - FMOPA (non-widening) + +def SVMOPA_ZA32_F32 : SInst<"svmopa_za32[_{d}]", "viPPdd", "f", MergeNone, "", [IsSME, IsSMEMop]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_F64F64)" in { + def SVMOPA_ZA64_F64 : SInst<"svmopa_za64[_{d}]", "viPPdd", "d", MergeNone, "", [IsSME, IsSMEMop]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME - SUMOPA, USMOPA + +def SVSUMOPA_ZA32 : SInst<"svsumopa_za32[_{d}]", "viPPdu", "c", MergeNone, "", [IsSME, IsSMEMop]>; +def SVUSMOPA_ZA32 : SInst<"svusmopa_za32[_{d}]", "viPPdx", "Uc", MergeNone, "", [IsSME, IsSMEMop]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_F64F64)" in { + def SVSUMOPA_ZA64 : SInst<"svsumopa_za64[_{d}]", "viPPdu", "s", MergeNone, "", [IsSME, IsSMEMop]>; + def SVUSMOPA_ZA64 : SInst<"svusmopa_za64[_{d}]", "viPPdx", "Us", MergeNone, "", [IsSME, IsSMEMop]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME - BFMOPS, FMOPS (widening), SMOPS, UMOPS + +def SVMOPS_ZA32 : SInst<"svmops_za32[_{d}]", "viPPdd", "cUchb", MergeNone, "", [IsSME, IsSMEMop]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_I16I64)" in { + def SVMOPS_ZA64 : SInst<"svmops_za64[_{d}]", "viPPdd", "sUs", MergeNone, "", [IsSME, IsSMEMop]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME - FMOPS (non-widening) + +def SVMOPS_ZA32_F32 : SInst<"svmops_za32[_{d}]", "viPPdd", "f", MergeNone, "", [IsSME, IsSMEMop]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_F64F64)" in { + def SVMOPS_ZA64_F64 : SInst<"svmops_za64[_{d}]", "viPPdd", "d", MergeNone, "", [IsSME, IsSMEMop]>; +} + +//////////////////////////////////////////////////////////////////////////////// +// SME - SUMOPS, USMOPS + +def SVSUMOPS_ZA32 : SInst<"svsumops_za32[_{d}]", "viPPdu", "c", MergeNone, "", [IsSME, IsSMEMop]>; +def SVUSMOPS_ZA32 : SInst<"svusmops_za32[_{d}]", "viPPdx", "Uc", MergeNone, "", [IsSME, IsSMEMop]>; +let ArchGuard = "defined(__ARM_FEATURE_SME_F64F64)" in { + def SVSUMOPS_ZA64 : SInst<"svsumops_za64[_{d}]", "viPPdu", "s", MergeNone, "", [IsSME, IsSMEMop]>; + def SVUSMOPS_ZA64 : SInst<"svusmops_za64[_{d}]", "viPPdx", "Us", MergeNone, "", [IsSME, IsSMEMop]>; +} diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h --- a/clang/lib/Basic/Targets/AArch64.h +++ b/clang/lib/Basic/Targets/AArch64.h @@ -57,6 +57,7 @@ bool HasRCPC; bool HasSME; bool HasSMEI64; + bool HasSMEF64; llvm::AArch64::ArchKind ArchKind; diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp --- a/clang/lib/Basic/Targets/AArch64.cpp +++ b/clang/lib/Basic/Targets/AArch64.cpp @@ -414,6 +414,9 @@ if (HasSME && HasSMEI64) Builder.defineMacro("__ARM_FEATURE_SME_I16I64", "1"); + if (HasSME && HasSMEF64) + Builder.defineMacro("__ARM_FEATURE_SME_F64F64", "1"); + if (Opts.hasSignReturnAddress()) { // Bitmask: // 0: Protection using the A key @@ -561,6 +564,7 @@ HasRCPC = false; HasSME = false; HasSMEI64 = false; + HasSMEF64 = false; ArchKind = llvm::AArch64::ArchKind::INVALID; @@ -578,6 +582,12 @@ HasBFloat16 = true; HasFullFP16 = true; } + if (Feature == "+sme-f64") { + HasSME = true; + HasSMEF64 = true; + HasBFloat16 = true; + HasFullFP16 = true; + } if (Feature == "+sve") { FPU |= SveMode; HasFullFP16 = true; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9160,6 +9160,70 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSMEMOP(SVETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned BuiltinID) { + auto *DefaultType = getSVEType(TypeFlags); + Ops[0] = Builder.CreateIntCast(Ops[0], Int64Ty, false); + Ops[1] = EmitSVEPredicateCast(Ops[1], DefaultType); + Ops[2] = EmitSVEPredicateCast(Ops[2], DefaultType); + unsigned IntID; + switch (BuiltinID) { + default: + return nullptr; + + case SVE::BI__builtin_sve_svmopa_za32_f16: + case SVE::BI__builtin_sve_svmopa_za32_bf16: + IntID = Intrinsic::aarch64_sme_mopa_wide; + break; + case SVE::BI__builtin_sve_svmopa_za32_s8: + case SVE::BI__builtin_sve_svmopa_za64_s16: + IntID = Intrinsic::aarch64_sme_smopa_wide; + break; + case SVE::BI__builtin_sve_svmopa_za32_u8: + case SVE::BI__builtin_sve_svmopa_za64_u16: + IntID = Intrinsic::aarch64_sme_umopa_wide; + break; + case SVE::BI__builtin_sve_svmopa_za32_f32: + case SVE::BI__builtin_sve_svmopa_za64_f64: + IntID = Intrinsic::aarch64_sme_mopa; + break; + case SVE::BI__builtin_sve_svsumopa_za32_s8: + case SVE::BI__builtin_sve_svsumopa_za64_s16: + IntID = Intrinsic::aarch64_sme_sumopa_wide; + break; + case SVE::BI__builtin_sve_svusmopa_za32_u8: + case SVE::BI__builtin_sve_svusmopa_za64_u16: + IntID = Intrinsic::aarch64_sme_usmopa_wide; + break; + case SVE::BI__builtin_sve_svmops_za32_f16: + case SVE::BI__builtin_sve_svmops_za32_bf16: + IntID = Intrinsic::aarch64_sme_mops_wide; + break; + case SVE::BI__builtin_sve_svmops_za32_s8: + case SVE::BI__builtin_sve_svmops_za64_s16: + IntID = Intrinsic::aarch64_sme_smops_wide; + break; + case SVE::BI__builtin_sve_svmops_za32_u8: + case SVE::BI__builtin_sve_svmops_za64_u16: + IntID = Intrinsic::aarch64_sme_umops_wide; + break; + case SVE::BI__builtin_sve_svmops_za32_f32: + case SVE::BI__builtin_sve_svmops_za64_f64: + IntID = Intrinsic::aarch64_sme_mops; + break; + case SVE::BI__builtin_sve_svsumops_za32_s8: + case SVE::BI__builtin_sve_svsumops_za64_s16: + IntID = Intrinsic::aarch64_sme_sumops_wide; + break; + case SVE::BI__builtin_sve_svusmops_za32_u8: + case SVE::BI__builtin_sve_svusmops_za64_u16: + IntID = Intrinsic::aarch64_sme_usmops_wide; + break; + } + Function *F = CGM.getIntrinsic(IntID, {DefaultType}); + return Builder.CreateCall(F, Ops); +} // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9376,6 +9440,33 @@ default: return nullptr; + case SVE::BI__builtin_sve_svmopa_za32_s8: + case SVE::BI__builtin_sve_svmopa_za32_u8: + case SVE::BI__builtin_sve_svmopa_za32_f16: + case SVE::BI__builtin_sve_svmopa_za32_bf16: + case SVE::BI__builtin_sve_svmopa_za32_f32: + case SVE::BI__builtin_sve_svmopa_za64_u16: + case SVE::BI__builtin_sve_svmopa_za64_s16: + case SVE::BI__builtin_sve_svmopa_za64_f64: + case SVE::BI__builtin_sve_svsumopa_za32_s8: + case SVE::BI__builtin_sve_svusmopa_za32_u8: + case SVE::BI__builtin_sve_svsumopa_za64_s16: + case SVE::BI__builtin_sve_svusmopa_za64_u16: + case SVE::BI__builtin_sve_svmops_za32_s8: + case SVE::BI__builtin_sve_svmops_za32_u8: + case SVE::BI__builtin_sve_svmops_za32_f16: + case SVE::BI__builtin_sve_svmops_za32_bf16: + case SVE::BI__builtin_sve_svmops_za32_f32: + case SVE::BI__builtin_sve_svmops_za64_u16: + case SVE::BI__builtin_sve_svmops_za64_s16: + case SVE::BI__builtin_sve_svmops_za64_f64: + case SVE::BI__builtin_sve_svsumops_za32_s8: + case SVE::BI__builtin_sve_svusmops_za32_u8: + case SVE::BI__builtin_sve_svsumops_za64_s16: + case SVE::BI__builtin_sve_svusmops_za64_u16: { + return EmitSMEMOP(TypeFlags, Ops, BuiltinID); + } + case SVE::BI__builtin_sve_svmov_b_z: { // svmov_b_z(pg, op) <=> svand_b_z(pg, op, op) SVETypeFlags TypeFlags(Builtin->TypeModifier); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4249,6 +4249,9 @@ llvm::Value *EmitSMEAdd(SVETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMEMOP(SVETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned BuiltinID); llvm::Value *EmitSVEPrefetchLoad(const SVETypeFlags &TypeFlags, SmallVectorImpl &Ops, unsigned BuiltinID); diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za32.c @@ -0,0 +1,125 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svmopa_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmopa_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + SME_ACLE_FUNC(svmopa_za32, _s8,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmopa_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + SME_ACLE_FUNC(svmopa_za32, _u8,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_za32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmopa_za32_bf16u10__SVBool_tu10__SVBool_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8bf16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { + SME_ACLE_FUNC(svmopa_za32, _bf16,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_za32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za32_f16u10__SVBool_tu10__SVBool_tu13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.wide.nxv8f16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { + SME_ACLE_FUNC(svmopa_za32, _f16,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za32_f32u10__SVBool_tu10__SVBool_tu13__SVFloat32_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv4f32(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + SME_ACLE_FUNC(svmopa_za32, _f32,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svsumopa_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svsumopa_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svsumopa_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) { + SME_ACLE_FUNC(svsumopa_za32, _s8,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svusmopa_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svusmopa_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svusmopa_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) { + SME_ACLE_FUNC(svusmopa_za32, _u8,)(0, pn, pm, zn, zm); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mopa-za64.c @@ -0,0 +1,105 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svmopa_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smopa.wide.nxv8i16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + SME_ACLE_FUNC(svmopa_za64, _s16,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umopa.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + SME_ACLE_FUNC(svmopa_za64, _u16,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmopa_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmopa_za64_f64u10__SVBool_tu10__SVBool_tu13__SVFloat64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mopa.nxv2f64(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmopa_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { + SME_ACLE_FUNC(svmopa_za64, _f64,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svsumopa_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svsumopa_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumopa.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svsumopa_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) { + SME_ACLE_FUNC(svsumopa_za64, _s16,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svusmopa_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 2, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svusmopa_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmopa.wide.nxv8i16(i64 2, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svusmopa_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) { + SME_ACLE_FUNC(svusmopa_za64, _u16,)(2, pn, pm, zn, zm); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za32.c @@ -0,0 +1,125 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svmops_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmops_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svint8_t zm) { + SME_ACLE_FUNC(svmops_za32, _s8,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z19test_svmops_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svuint8_t zm) { + SME_ACLE_FUNC(svmops_za32, _u8,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_za32_bf16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svmops_za32_bf16u10__SVBool_tu10__SVBool_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8bf16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za32_bf16(svbool_t pn, svbool_t pm, svbfloat16_t zn, svbfloat16_t zm) { + SME_ACLE_FUNC(svmops_za32, _bf16,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_za32_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za32_f16u10__SVBool_tu10__SVBool_tu13__SVFloat16_tu13__SVFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.wide.nxv8f16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za32_f16(svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) { + SME_ACLE_FUNC(svmops_za32, _f16,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_za32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za32_f32u10__SVBool_tu10__SVBool_tu13__SVFloat32_tu13__SVFloat32_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv4i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv4f32(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za32_f32(svbool_t pn, svbool_t pm, svfloat32_t zn, svfloat32_t zm) { + SME_ACLE_FUNC(svmops_za32, _f32,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svsumops_za32_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svsumops_za32_s8u10__SVBool_tu10__SVBool_tu10__SVInt8_tu11__SVUint8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svsumops_za32_s8(svbool_t pn, svbool_t pm, svint8_t zn, svuint8_t zm) { + SME_ACLE_FUNC(svsumops_za32, _s8,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svusmops_za32_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z21test_svusmops_za32_u8u10__SVBool_tu10__SVBool_tu11__SVUint8_tu10__SVInt8_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv16i8(i64 0, [[PN:%.*]], [[PM:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svusmops_za32_u8(svbool_t pn, svbool_t pm, svuint8_t zn, svint8_t zm) { + SME_ACLE_FUNC(svusmops_za32, _u8,)(0, pn, pm, zn, zm); +} diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_mops-za64.c @@ -0,0 +1,105 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -DSME_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme-f64 -target-feature +sme-i64 -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -o /dev/null %s + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: @test_svmops_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.smops.wide.nxv8i16(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svint16_t zm) { + SME_ACLE_FUNC(svmops_za64, _s16,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.umops.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svuint16_t zm) { + SME_ACLE_FUNC(svmops_za64, _u16,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svmops_za64_f64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z20test_svmops_za64_f64u10__SVBool_tu10__SVBool_tu13__SVFloat64_tu13__SVFloat64_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv2i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.mops.nxv2f64(i64 1, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svmops_za64_f64(svbool_t pn, svbool_t pm, svfloat64_t zn, svfloat64_t zm) { + SME_ACLE_FUNC(svmops_za64, _f64,)(1, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svsumops_za64_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svsumops_za64_s16u10__SVBool_tu10__SVBool_tu11__SVInt16_tu12__SVUint16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.sumops.wide.nxv8i16(i64 0, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svsumops_za64_s16(svbool_t pn, svbool_t pm, svint16_t zn, svuint16_t zm) { + SME_ACLE_FUNC(svsumops_za64, _s16,)(0, pn, pm, zn, zm); +} + +// CHECK-LABEL: @test_svusmops_za64_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 2, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret void +// +// CPP-CHECK-LABEL: @_Z22test_svusmops_za64_u16u10__SVBool_tu10__SVBool_tu12__SVUint16_tu11__SVInt16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PN:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( [[PM:%.*]]) +// CPP-CHECK-NEXT: tail call void @llvm.aarch64.sme.usmops.wide.nxv8i16(i64 2, [[TMP0]], [[TMP1]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret void +// +__attribute__((arm_streaming)) void test_svusmops_za64_u16(svbool_t pn, svbool_t pm, svuint16_t zn, svint16_t zm) { + SME_ACLE_FUNC(svusmops_za64, _u16,)(2, pn, pm, zn, zm); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -881,6 +881,8 @@ this->SMEAttributes = "arm_streaming_compatible, arm_preserves_za"; else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEAdd")) this->SMEAttributes = "arm_streaming, arm_shared_za"; + else if (this->Flags & Emitter.getEnumValueForFlag("IsSMEMop")) + this->SMEAttributes = "arm_streaming, arm_shared_za"; else llvm_unreachable("Unknown SME instruction"); } else