diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1959,6 +1959,12 @@ def SVDOT_LANE_X2_S : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "i", MergeNone, "aarch64_sve_sdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_U : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "Ui", MergeNone, "aarch64_sve_udot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; def SVDOT_LANE_X2_F : SInst<"svdot_lane[_{d}_{2}_{3}]", "ddhhi", "f", MergeNone, "aarch64_sve_fdot_lane_x2", [], [ImmCheck<3, ImmCheck0_3>]>; + +def SVBFMLSLB : SInst<"svbfmlslb[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslb", [IsOverloadNone], []>; +def SVBFMLSLT : SInst<"svbfmlslt[_{d}]", "dd$$", "f", MergeNone, "aarch64_sve_bfmlslt", [IsOverloadNone], []>; + +def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; +def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone], [ImmCheck<3, ImmCheck0_7>]>; } let TargetGuard = "sve2p1" in { diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -99,6 +99,7 @@ // O: svfloat16_t // M: svfloat32_t // N: svfloat64_t +// $: svbfloat16_t // J: Prefetch type (sv_prfop) diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfmlsl.c @@ -0,0 +1,85 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +// BFMLSLB + + +// CHECK-LABEL: @test_bfmlslb( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslb( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z12test_bfmlslbu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslb( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_bfmlslb(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) +{ + return SVE_ACLE_FUNC(svbfmlslb,_f32,,)(zda, zn, zm); +} + + +// CHECK-LABEL: @test_bfmlslb_lane( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslb.lane( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_bfmlslb_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslb.lane( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_bfmlslb_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) +{ + return SVE_ACLE_FUNC(svbfmlslb_lane,_f32,,)(zda, zn, zm, 7); +} + +// BFMLSLT + + +// CHECK-LABEL: @test_bfmlslt( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslt( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z12test_bfmlsltu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslt( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]]) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_bfmlslt(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) +{ + return SVE_ACLE_FUNC(svbfmlslt,_f32,,)(zda, zn, zm); +} + + +// CHECK-LABEL: @test_bfmlslt_lane( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslt.lane( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) +// CHECK-NEXT: ret [[TMP0]] +// +// CPP-CHECK-LABEL: @_Z17test_bfmlslt_laneu13__SVFloat32_tu14__SVBFloat16_tu14__SVBFloat16_t( +// CPP-CHECK-NEXT: entry: +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call @llvm.aarch64.sve.bfmlslt.lane( [[ZDA:%.*]], [[ZN:%.*]], [[ZM:%.*]], i32 7) +// CPP-CHECK-NEXT: ret [[TMP0]] +// +svfloat32_t test_bfmlslt_lane(svfloat32_t zda, svbfloat16_t zn, svbfloat16_t zm) +{ + return SVE_ACLE_FUNC(svbfmlslt_lane,_f32,,)(zda, zn, zm, 7); +} diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -852,6 +852,13 @@ NumVectors = 0; Signed = false; break; + case '$': + Predicate = false; + Svcount = false; + Float = false; + BFloat = true; + ElementBitwidth = 16; + break; case '}': Predicate = false; Signed = true; diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3011,6 +3011,16 @@ [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + class SME2_BFMLS_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], + [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty], + [IntrNoMem]>; + + class SME2_BFMLS_Lane_Intrinsic + : DefaultAttrsIntrinsic<[llvm_nxv4f32_ty], + [llvm_nxv4f32_ty, llvm_nxv8bf16_ty, llvm_nxv8bf16_ty, llvm_i32_ty], + [IntrNoMem, ImmArg>]>; + class SME2_ZA_ArrayVector_Read_VG2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], [llvm_i32_ty], @@ -3214,6 +3224,12 @@ def int_aarch64_sme_usmla_za32_lane_vg4x2 : SME2_Matrix_ArrayVector_VG2_Multi_Index_Intrinsic; def int_aarch64_sme_usmla_za32_lane_vg4x4 : SME2_Matrix_ArrayVector_VG4_Multi_Index_Intrinsic; + def int_aarch64_sve_bfmlslb : SME2_BFMLS_Intrinsic; + def int_aarch64_sve_bfmlslb_lane : SME2_BFMLS_Lane_Intrinsic; + + def int_aarch64_sve_bfmlslt : SME2_BFMLS_Intrinsic; + def int_aarch64_sve_bfmlslt_lane : SME2_BFMLS_Lane_Intrinsic; + // Multi-vector signed saturating doubling multiply high def int_aarch64_sve_sqdmulh_single_vgx2 : SME2_VG2_Multi_Single_Intrinsic; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3753,12 +3753,14 @@ let Predicates = [HasSVE2p1_or_HasSME2] in { defm FCLAMP_ZZZ : sve2p1_fclamp<"fclamp", int_aarch64_sve_fclamp>; + defm FDOT_ZZZ_S : sve_float_dot<0b0, 0b0, ZPR32, ZPR16, "fdot", nxv8f16, int_aarch64_sve_fdot_x2>; defm FDOT_ZZZI_S : sve_float_dot_indexed<0b0, 0b00, ZPR16, ZPR3b16, "fdot", nxv8f16, int_aarch64_sve_fdot_lane_x2>; -def BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb">; -def BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt">; -def BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb">; -def BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt">; + +defm BFMLSLB_ZZZ_S : sve2_fp_mla_long<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb>; +defm BFMLSLT_ZZZ_S : sve2_fp_mla_long<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt>; +defm BFMLSLB_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b110, "bfmlslb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslb_lane>; +defm BFMLSLT_ZZZI_S : sve2_fp_mla_long_by_indexed_elem<0b111, "bfmlslt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlslt_lane>; defm SDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"sdot", 0b0, int_aarch64_sve_sdot_x2>; defm UDOT_ZZZ_HtoS : sve2p1_two_way_dot_vv<"udot", 0b1, int_aarch64_sve_udot_x2>; diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-bfmls.ll @@ -0,0 +1,43 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p1 -mattr=+b16b16 -verify-machineinstrs < %s | FileCheck %s + +define @bfmlslb_f32( %zda, %zn, %zm) { +; CHECK-LABEL: bfmlslb_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmlslb z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bfmlslb( %zda, %zn, %zm) + ret %out +} + +define @bfmlslt_f32( %zda, %zn, %zm) { +; CHECK-LABEL: bfmlslt_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmlslt z0.s, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bfmlslt( %zda, %zn, %zm) + ret %out +} + +define @bfmlslb_lane_f32( %zda, %zn, %zm) { +; CHECK-LABEL: bfmlslb_lane_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmlslb z0.s, z1.h, z2.h[7] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bfmlslb.lane( %zda, %zn, %zm, i32 7) + ret %out +} + +define @bfmlslt_lane_f32( %zda, %zn, %zm) { +; CHECK-LABEL: bfmlslt_lane_f32: +; CHECK: // %bb.0: +; CHECK-NEXT: bfmlslt z0.s, z1.h, z2.h[7] +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.bfmlslt.lane( %zda, %zn, %zm, i32 7) + ret %out +} + +declare @llvm.aarch64.sve.bfmlslb(, , ) +declare @llvm.aarch64.sve.bfmlslt(, , ) +declare @llvm.aarch64.sve.bfmlslb.lane(, , , i32) +declare @llvm.aarch64.sve.bfmlslt.lane(, , , i32)