diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -365,6 +365,7 @@ bool isLoad() const { return Flags & IsLoad; } bool isStore() const { return Flags & IsStore; } bool isMove() const { return Flags & IsMove; } + bool isZero() const { return Flags & IsZero; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -92,3 +92,9 @@ def SVWRITE_VER_ZA32 : SInst<"svwrite_ver_za32[_{d}]", "vimiPd", "iUif", MergeOp1, "aarch64_sme_write_vert", [IsMove, IsStreaming, IsSharedZA]>; def SVWRITE_VER_ZA64 : SInst<"svwrite_ver_za64[_{d}]", "vimiPd", "lUld", MergeOp1, "aarch64_sme_write_vert", [IsMove, IsStreaming, IsSharedZA]>; def SVWRITE_VER_ZA128 : SInst<"svwrite_ver_za128[_{d}]", "vimiPd", "csilUcUsUiUlhbfd", MergeOp1, "aarch64_sme_writeq_vert", [IsMove, IsStreaming, IsSharedZA]>; + +//////////////////////////////////////////////////////////////////////////////// +// SME - Zero + +def SVZERO_MASK_ZA : SInst<"svzero_mask_za", "vi", "", MergeNone, "aarch64_sme_zero", [IsZero, IsOverloadNone, IsStreamingCompatible, IsSharedZA]>; +def SVZERO_ZA : SInst<"svzero_za", "v", "", MergeNone, "aarch64_sme_zero", [IsZero, IsOverloadNone, IsStreamingCompatible, IsSharedZA]>; diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -216,6 +216,7 @@ def IsSharedZA : FlagType<0x2000000000>; def IsPreservesZA : FlagType<0x4000000000>; def IsMove : FlagType<0x8000000000>; +def IsZero : FlagType<0x10000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -9438,6 +9438,16 @@ return Builder.CreateCall(F, Ops); } +Value *CodeGenFunction::EmitSMEZero(SMETypeFlags TypeFlags, + SmallVectorImpl &Ops, + unsigned IntID) { + // svzero_za() intrinsic zeros the entire za tile and has no paramters. + if (Ops.size() == 0) + Ops.push_back(llvm::ConstantInt::get(Int32Ty, 255)); + Function *F = CGM.getIntrinsic(IntID, {}); + return Builder.CreateCall(F, Ops); +} + // Limit the usage of scalable llvm IR generated by the ACLE by using the // sve dup.x intrinsic instead of IRBuilder::CreateVectorSplat. Value *CodeGenFunction::EmitSVEDupX(Value *Scalar, llvm::Type *Ty) { @@ -9894,6 +9904,8 @@ return EmitSMELd1St1(TypeFlags, Ops, Builtin->LLVMIntrinsic); else if (TypeFlags.isMove()) return EmitSMEReadWrite(TypeFlags, Ops, Builtin->LLVMIntrinsic); + else if (TypeFlags.isZero()) + return EmitSMEZero(TypeFlags, Ops, Builtin->LLVMIntrinsic); /// Should not happen return nullptr; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4256,6 +4256,9 @@ llvm::Value *EmitSMEReadWrite(SMETypeFlags TypeFlags, llvm::SmallVectorImpl &Ops, unsigned IntID); + llvm::Value *EmitSMEZero(SMETypeFlags TypeFlags, + llvm::SmallVectorImpl &Ops, + unsigned IntID); llvm::Value *EmitAArch64SMEBuiltinExpr(unsigned BuiltinID, const CallExpr *E); llvm::Value *EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, diff --git a/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme-intrinsics/acle_sme_zero.c @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefixes=CHECK,CHECK-CXX +// RUN: %clang_cc1 -D__ARM_FEATURE_SME -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -S -O1 -Werror -o /dev/null %s + +#include + +// CHECK-C-LABEL: @test_svzero_mask_za( +// CHECK-CXX-LABEL: @_Z19test_svzero_mask_zav( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 0) +// CHECK-NEXT: ret void +// +void test_svzero_mask_za() { + svzero_mask_za(0); +} + +// CHECK-C-LABEL: @test_svzero_mask_za_1( +// CHECK-CXX-LABEL: @_Z21test_svzero_mask_za_1v( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 176) +// CHECK-NEXT: ret void +// +void test_svzero_mask_za_1() { + svzero_mask_za(176); +} + +// CHECK-C-LABEL: @test_svzero_mask_za_2( +// CHECK-CXX-LABEL: @_Z21test_svzero_mask_za_2v( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) +// CHECK-NEXT: ret void +// +void test_svzero_mask_za_2() { + svzero_mask_za(255); +} + +// CHECK-C-LABEL: @test_svzero_za( +// CHECK-CXX-LABEL: @_Z14test_svzero_zav( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.aarch64.sme.zero(i32 255) +// CHECK-NEXT: ret void +// +void test_svzero_za() { + svzero_za(); +}