diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -903,7 +903,9 @@ defm SVCLS : SInstCLS<"svcls", "csil", "aarch64_sve_cls">; defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz">; defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; - +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt">; +} //////////////////////////////////////////////////////////////////////////////// // Conversion @@ -1153,6 +1155,9 @@ def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">; +} def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; @@ -1920,6 +1925,11 @@ def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; } +let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in { +def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone>; +def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx">; +} + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Optional diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8462,6 +8462,7 @@ case SVE::BI__builtin_sve_svtbl2_u64: case SVE::BI__builtin_sve_svtbl2_s64: case SVE::BI__builtin_sve_svtbl2_f16: + case SVE::BI__builtin_sve_svtbl2_bf16: case SVE::BI__builtin_sve_svtbl2_f32: case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c @@ -0,0 +1,44 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8bf16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_z'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _z, )(pg, op); +} + +svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8bf16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_m'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _m, )(inactive, pg, op); +} +svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8bf16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_x'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _x, )(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) { + // CHECK-LABEL: test_svtbl_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv8bf16( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svtbl_bf16'}} + return SVE_ACLE_FUNC(svtbl, _bf16, , )(data, indices); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c @@ -0,0 +1,25 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices) { + // CHECK-LABEL: test_svtbl2_bf16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %data, i32 1) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl2.nxv8bf16( %[[V0]], %[[V1]], %indices) + // CHECK-NEXT: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbl2'}} + // expected-warning@+1 {{implicit declaration of function 'svtbl2_bf16'}} + return SVE_ACLE_FUNC(svtbl2, _bf16, , )(data, indices); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c @@ -0,0 +1,23 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbx_bf16(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) { + // CHECK-LABEL: @test_svtbx_bf16( + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv8bf16( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_bf16'}} + return SVE_ACLE_FUNC(svtbx, _bf16, , )(fallback, data, indices); +} diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1020,6 +1020,8 @@ def : SVE_2_Op_Pat(NAME # _H)>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; + + def : SVE_2_Op_Pat(NAME # _H)>; } multiclass sve2_int_perm_tbl { @@ -1053,6 +1055,11 @@ nxv8f16:$Op2, zsub1), nxv8i16:$Op3))>; + def : Pat<(nxv8bf16 (op nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)), + (nxv8bf16 (!cast(NAME # _H) (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, + nxv8bf16:$Op2, zsub1), + nxv8i16:$Op3))>; + def : Pat<(nxv4f32 (op nxv4f32:$Op1, nxv4f32:$Op2, nxv4i32:$Op3)), (nxv4f32 (!cast(NAME # _S) (REG_SEQUENCE ZPR2, nxv4f32:$Op1, zsub0, nxv4f32:$Op2, zsub1), @@ -1097,6 +1104,8 @@ def : SVE_3_Op_Pat(NAME # _H)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; + + def : SVE_3_Op_Pat(NAME # _H)>; } class sve_int_perm_reverse_z sz8_64, string asm, ZPRRegOp zprty> @@ -3685,9 +3694,10 @@ def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } multiclass sve_int_un_pred_arit_1_fp opc, string asm, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll @@ -145,6 +145,16 @@ ret %out } +define @cnt_bf16( %a, %pg, %b) { +; CHECK-LABEL: cnt_bf16: +; CHECK: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cnt.nxv8bf16( %a, + %pg, + %b) + ret %out +} + define @cnt_f32( %a, %pg, %b) { ; CHECK-LABEL: cnt_f32: ; CHECK: cnt z0.s, p0/m, z1.s @@ -180,5 +190,6 @@ declare @llvm.aarch64.sve.cnt.nxv4i32(, , ) declare @llvm.aarch64.sve.cnt.nxv2i64(, , ) declare @llvm.aarch64.sve.cnt.nxv8f16(, , ) +declare @llvm.aarch64.sve.cnt.nxv8bf16(, , ) declare @llvm.aarch64.sve.cnt.nxv4f32(, , ) declare @llvm.aarch64.sve.cnt.nxv2f64(, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -1009,6 +1009,15 @@ ret %out } +define @tbl_bf16( %a, %b) { +; CHECK-LABEL: tbl_bf16: +; CHECK: tbl z0.h, { z0.h }, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.tbl.nxv8bf16( %a, + %b) + ret %out +} + define @tbl_f32( %a, %b) { ; CHECK-LABEL: tbl_f32: ; CHECK: tbl z0.s, { z0.s }, z1.s @@ -1859,6 +1868,7 @@ declare @llvm.aarch64.sve.tbl.nxv4i32(, ) declare @llvm.aarch64.sve.tbl.nxv2i64(, ) declare @llvm.aarch64.sve.tbl.nxv8f16(, ) +declare @llvm.aarch64.sve.tbl.nxv8bf16(, ) declare @llvm.aarch64.sve.tbl.nxv4f32(, ) declare @llvm.aarch64.sve.tbl.nxv2f64(, ) diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll @@ -122,6 +122,16 @@ ret %out } +define @ftbx_h_bf16( %a, %b, %c) { +; CHECK-LABEL: ftbx_h_bf16: +; CHECK: tbx z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.tbx.nxv8bf16( %a, + %b, + %c) + ret %out +} + define @tbx_s( %a, %b, %c) { ; CHECK-LABEL: tbx_s: ; CHECK: tbx z0.s, z1.s, z2.s @@ -179,3 +189,5 @@ declare @llvm.aarch64.sve.tbx.nxv8f16(, , ) declare @llvm.aarch64.sve.tbx.nxv4f32(, , ) declare @llvm.aarch64.sve.tbx.nxv2f64(, , ) + +declare @llvm.aarch64.sve.tbx.nxv8bf16(, , )