diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -928,6 +928,10 @@ defm SVCLZ : SInstCLS<"svclz", "csilUcUsUiUl", "aarch64_sve_clz">; defm SVCNT : SInstCLS<"svcnt", "csilUcUsUiUlhfd", "aarch64_sve_cnt">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + defm SVCNT_BF16 : SInstCLS<"svcnt", "b", "aarch64_sve_cnt">; +} + //////////////////////////////////////////////////////////////////////////////// // Conversion @@ -1177,6 +1181,11 @@ def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; def SVTBL : SInst<"svtbl[_{d}]", "ddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; + +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVTBL_BF16 : SInst<"svtbl[_{d}]", "ddu", "b", MergeNone, "aarch64_sve_tbl">; +} + def SVTRN1 : SInst<"svtrn1[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn1">; def SVTRN2 : SInst<"svtrn2[_{d}]", "ddd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_trn2">; def SVUNPKHI_S : SInst<"svunpkhi[_{d}]", "dh", "sil", MergeNone, "aarch64_sve_sunpkhi">; @@ -1974,6 +1983,11 @@ def SVTBX : SInst<"svtbx[_{d}]", "dddu", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbx">; } +let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in { +def SVTBL2_BF16 : SInst<"svtbl2[_{d}]", "d2u", "b", MergeNone>; +def SVTBX_BF16 : SInst<"svtbx[_{d}]", "dddu", "b", MergeNone, "aarch64_sve_tbx">; +} + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Optional diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8465,6 +8465,7 @@ case SVE::BI__builtin_sve_svtbl2_u64: case SVE::BI__builtin_sve_svtbl2_s64: case SVE::BI__builtin_sve_svtbl2_f16: + case SVE::BI__builtin_sve_svtbl2_bf16: case SVE::BI__builtin_sve_svtbl2_f32: case SVE::BI__builtin_sve_svtbl2_f64: { SVETypeFlags TF(Builtin->TypeModifier); diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_cnt-bfloat.c @@ -0,0 +1,44 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svuint16_t test_svcnt_bf16_z(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8bf16( zeroinitializer, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_z'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _z, )(pg, op); +} + +svuint16_t test_svcnt_bf16_m(svuint16_t inactive, svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8bf16( %inactive, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_m'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _m, )(inactive, pg, op); +} +svuint16_t test_svcnt_bf16_x(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svcnt_bf16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.cnt.nxv8bf16( undef, %[[PG]], %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svcnt_bf16_x'}} + return SVE_ACLE_FUNC(svcnt, _bf16, _x, )(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_tbl-bfloat.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbl_bf16(svbfloat16_t data, svuint16_t indices) { + // CHECK-LABEL: test_svtbl_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl.nxv8bf16( %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svtbl_bf16'}} + return SVE_ACLE_FUNC(svtbl, _bf16, , )(data, indices); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbl2-bfloat.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbl2_bf16(svbfloat16x2_t data, svuint16_t indices) { + // CHECK-LABEL: test_svtbl2_bf16 + // CHECK-DAG: %[[V0:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %data, i32 0) + // CHECK-DAG: %[[V1:.*]] = call @llvm.aarch64.sve.tuple.get.nxv8bf16.nxv16bf16( %data, i32 1) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbl2.nxv8bf16( %[[V0]], %[[V1]], %indices) + // CHECK-NEXT: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbl2'}} + // expected-warning@+1 {{implicit declaration of function 'svtbl2_bf16'}} + return SVE_ACLE_FUNC(svtbl2, _bf16, , )(data, indices); +} diff --git a/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_tbx-bfloat.c @@ -0,0 +1,24 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +bf16 -target-feature +sve -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svtbx_bf16(svbfloat16_t fallback, svbfloat16_t data, svuint16_t indices) { + // CHECK-LABEL: @test_svtbx_bf16( + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.tbx.nxv8bf16( %fallback, %data, %indices) + // CHECK: ret %[[INTRINSIC]] + // overload-warning@+2 {{implicit declaration of function 'svtbx'}} + // expected-warning@+1 {{implicit declaration of function 'svtbx_bf16'}} + return SVE_ACLE_FUNC(svtbx, _bf16, , )(fallback, data, indices); +} diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -284,6 +284,11 @@ defm CLS_ZPmZ : sve_int_un_pred_arit_1< 0b000, "cls", int_aarch64_sve_cls>; defm CLZ_ZPmZ : sve_int_un_pred_arit_1< 0b001, "clz", int_aarch64_sve_clz>; defm CNT_ZPmZ : sve_int_un_pred_arit_1< 0b010, "cnt", int_aarch64_sve_cnt>; + + let Predicates = [HasSVE, HasBF16] in { + def : SVE_3_Op_Pat(CNT_ZPmZ_H)>; + } + defm CNOT_ZPmZ : sve_int_un_pred_arit_1< 0b011, "cnot", int_aarch64_sve_cnot>; defm NOT_ZPmZ : sve_int_un_pred_arit_1< 0b110, "not", int_aarch64_sve_not>; defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>; @@ -992,6 +997,10 @@ defm TBL_ZZZ : sve_int_perm_tbl<"tbl", AArch64tbl>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_2_Op_Pat; + } + defm ZIP1_ZZZ : sve_int_perm_bin_perm_zz<0b000, "zip1", AArch64zip1>; defm ZIP2_ZZZ : sve_int_perm_bin_perm_zz<0b001, "zip2", AArch64zip2>; defm UZP1_ZZZ : sve_int_perm_bin_perm_zz<0b010, "uzp1", AArch64uzp1>; @@ -2347,6 +2356,13 @@ defm TBL_ZZZZ : sve2_int_perm_tbl<"tbl", int_aarch64_sve_tbl2>; defm TBX_ZZZ : sve2_int_perm_tbx<"tbx", int_aarch64_sve_tbx>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_3_Op_Pat; + def : Pat<(nxv8bf16 (int_aarch64_sve_tbl2 nxv8bf16:$Op1, nxv8bf16:$Op2, nxv8i16:$Op3)), + (nxv8bf16 (TBL_ZZZZ_H (REG_SEQUENCE ZPR2, nxv8bf16:$Op1, zsub0, nxv8bf16:$Op2, zsub1), + nxv8i16:$Op3))>; + } + // SVE2 integer compare scalar count and limit defm WHILEGE_PWW : sve_int_while4_rr<0b000, "whilege", int_aarch64_sve_whilege>; defm WHILEGT_PWW : sve_int_while4_rr<0b001, "whilegt", int_aarch64_sve_whilegt>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll @@ -145,6 +145,16 @@ ret %out } +define @cnt_bf16( %a, %pg, %b) #0 { +; CHECK-LABEL: cnt_bf16: +; CHECK: cnt z0.h, p0/m, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.cnt.nxv8bf16( %a, + %pg, + %b) + ret %out +} + define @cnt_f32( %a, %pg, %b) { ; CHECK-LABEL: cnt_f32: ; CHECK: cnt z0.s, p0/m, z1.s @@ -180,5 +190,9 @@ declare @llvm.aarch64.sve.cnt.nxv4i32(, , ) declare @llvm.aarch64.sve.cnt.nxv2i64(, , ) declare @llvm.aarch64.sve.cnt.nxv8f16(, , ) +declare @llvm.aarch64.sve.cnt.nxv8bf16(, , ) declare @llvm.aarch64.sve.cnt.nxv4f32(, , ) declare @llvm.aarch64.sve.cnt.nxv2f64(, , ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -1027,6 +1027,15 @@ ret %out } +define @tbl_bf16( %a, %b) #0 { +; CHECK-LABEL: tbl_bf16: +; CHECK: tbl z0.h, { z0.h }, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.tbl.nxv8bf16( %a, + %b) + ret %out +} + define @tbl_f32( %a, %b) { ; CHECK-LABEL: tbl_f32: ; CHECK: tbl z0.s, { z0.s }, z1.s @@ -1933,6 +1942,7 @@ declare @llvm.aarch64.sve.tbl.nxv4i32(, ) declare @llvm.aarch64.sve.tbl.nxv2i64(, ) declare @llvm.aarch64.sve.tbl.nxv8f16(, ) +declare @llvm.aarch64.sve.tbl.nxv8bf16(, ) declare @llvm.aarch64.sve.tbl.nxv4f32(, ) declare @llvm.aarch64.sve.tbl.nxv2f64(, ) @@ -2027,3 +2037,6 @@ declare @llvm.aarch64.sve.zip2.nxv8f16(, ) declare @llvm.aarch64.sve.zip2.nxv4f32(, ) declare @llvm.aarch64.sve.zip2.nxv2f64(, ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-perm-tb.ll @@ -122,6 +122,16 @@ ret %out } +define @ftbx_h_bf16( %a, %b, %c) #0 { +; CHECK-LABEL: ftbx_h_bf16: +; CHECK: tbx z0.h, z1.h, z2.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.tbx.nxv8bf16( %a, + %b, + %c) + ret %out +} + define @tbx_s( %a, %b, %c) { ; CHECK-LABEL: tbx_s: ; CHECK: tbx z0.s, z1.s, z2.s @@ -179,3 +189,8 @@ declare @llvm.aarch64.sve.tbx.nxv8f16(, , ) declare @llvm.aarch64.sve.tbx.nxv4f32(, , ) declare @llvm.aarch64.sve.tbx.nxv2f64(, , ) + +declare @llvm.aarch64.sve.tbx.nxv8bf16(, , ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" }