diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -725,13 +725,23 @@ def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone>; def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone>; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVDUPQ_BF16 : SInst<"svdupq[_n]_{d}", "dssssssss", "b", MergeNone>; +} def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>; def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; -def SVDUP : SInst<"svdup[_n]_{d}", "ds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dup_x">; -def SVDUP_M : SInst<"svdup[_n]_{d}", "ddPs", "csilUcUsUiUlhfd", MergeOp1, "aarch64_sve_dup">; -def SVDUP_X : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeAnyExp, "aarch64_sve_dup">; -def SVDUP_Z : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeZeroExp, "aarch64_sve_dup">; +multiclass svdup_base { + def NAME : SInst; + let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def _BF16: SInst; + } +} + +defm SVDUP : svdup_base<"svdup[_n]_{d}", "ds", MergeNone, "aarch64_sve_dup_x">; +defm SVDUP_M : svdup_base<"svdup[_n]_{d}", "ddPs", MergeOp1, "aarch64_sve_dup">; +defm SVDUP_X : svdup_base<"svdup[_n]_{d}", "dPs", MergeAnyExp, "aarch64_sve_dup">; +defm SVDUP_Z : svdup_base<"svdup[_n]_{d}", "dPs", MergeZeroExp, "aarch64_sve_dup">; def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">; @@ -850,8 +860,11 @@ def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; +def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr">; +} //////////////////////////////////////////////////////////////////////////////// // Integer reductions @@ -1173,10 +1186,18 @@ //////////////////////////////////////////////////////////////////////////////// // Permutations and selection -def SVCLASTA : SInst<"svclasta[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta">; -def SVCLASTA_N : SInst<"svclasta[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clasta_n">; -def SVCLASTB : SInst<"svclastb[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb">; -def SVCLASTB_N : SInst<"svclastb[_n_{d}]", "sPsd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_clastb_n">; +multiclass SVEPerm { + def : SInst; + let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def: SInst; + } +} + +defm SVCLASTA : SVEPerm<"svclasta[_{d}]", "dPdd", "aarch64_sve_clasta">; +defm SVCLASTA_N : SVEPerm<"svclasta[_n_{d}]", "sPsd", "aarch64_sve_clasta_n">; +defm SVCLASTB : SVEPerm<"svclastb[_{d}]", "dPdd", "aarch64_sve_clastb">; +defm SVCLASTB_N : SVEPerm<"svclastb[_n_{d}]", "sPsd", "aarch64_sve_clastb_n">; + def SVCOMPACT : SInst<"svcompact[_{d}]", "dPd", "ilUiUlfd", MergeNone, "aarch64_sve_compact">; // Note: svdup_lane is implemented using the intrinsic for TBL to represent a // splat of any possible lane. It is upto LLVM to pick a more efficient @@ -1184,9 +1205,12 @@ // instruction's immediate. def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16)" in { + def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">; +} def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; -def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; -def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; +defm SVLASTA : SVEPerm<"svlasta[_{d}]", "sPd", "aarch64_sve_lasta">; +defm SVLASTB : SVEPerm<"svlastb[_{d}]", "sPd", "aarch64_sve_lastb">; def SVREV : SInst<"svrev[_{d}]", "dd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_rev">; def SVSEL : SInst<"svsel[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_sel">; def SVSPLICE : SInst<"svsplice[_{d}]", "dPdd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_splice">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -8386,6 +8386,7 @@ case SVE::BI__builtin_sve_svdupq_n_s64: case SVE::BI__builtin_sve_svdupq_n_u16: case SVE::BI__builtin_sve_svdupq_n_f16: + case SVE::BI__builtin_sve_svdupq_n_bf16: case SVE::BI__builtin_sve_svdupq_n_s16: case SVE::BI__builtin_sve_svdupq_n_u32: case SVE::BI__builtin_sve_svdupq_n_f32: diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clasta-bfloat.c @@ -0,0 +1,36 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svclasta_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) { + // CHECK-LABEL: test_svclasta_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clasta.nxv8bf16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svclasta_bf16'}} + return SVE_ACLE_FUNC(svclasta, _bf16, , )(pg, fallback, data); +} + +bfloat16_t test_svclasta_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) { + // CHECK-LABEL: test_svclasta_n_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( %[[PG]], bfloat %fallback, %data) + // CHECK: ret bfloat %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svclasta_n_bf16'}} + return SVE_ACLE_FUNC(svclasta, _n_bf16, , )(pg, fallback, data); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_clastb-bfloat.c @@ -0,0 +1,36 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svclastb_bf16(svbool_t pg, svbfloat16_t fallback, svbfloat16_t data) { + // CHECK-LABEL: test_svclastb_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.clastb.nxv8bf16( %[[PG]], %fallback, %data) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svclastb_bf16'}} + return SVE_ACLE_FUNC(svclastb, _bf16, , )(pg, fallback, data); +} + +bfloat16_t test_svclastb_n_bf16(svbool_t pg, bfloat16_t fallback, svbfloat16_t data) { + // CHECK-LABEL: test_svclastb_n_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( %[[PG]], bfloat %fallback, %data) + // CHECK: ret bfloat %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svclastb_n_bf16'}} + return SVE_ACLE_FUNC(svclastb, _n_bf16, , )(pg, fallback, data); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup-bfloat.c @@ -0,0 +1,53 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svdup_n_bf16(bfloat16_t op) { + // CHECK-LABEL: test_svdup_n_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16, )(op); +} + +svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) { + // CHECK-LABEL: test_svdup_n_bf16_z + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, %[[PG]], bfloat %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_z'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16_z, )(pg, op); +} + +svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) { + // CHECK-LABEL: test_svdup_n_bf16_m + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.nxv8bf16( %inactive, %[[PG]], bfloat %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_m'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16_m, )(inactive, pg, op); +} + +svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) { + // CHECK-LABEL: test_svdup_n_bf16_x + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.nxv8bf16( undef, %[[PG]], bfloat %op) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svdup_n_bf16_x'}} + return SVE_ACLE_FUNC(svdup, _n, _bf16_x, )(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq-bfloat.c @@ -0,0 +1,42 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) { + // CHECK-LABEL: test_svdupq_lane_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %data, i64 %index) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svdupq_lane_bf16'}} + return SVE_ACLE_FUNC(svdupq_lane, _bf16, , )(data, index); +} +svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3, + bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) { + // CHECK-LABEL: test_svdupq_n_bf16 + // CHECK: %[[ALLOCA:.*]] = alloca [8 x bfloat], align 16 + // CHECK-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x bfloat], [8 x bfloat]* %[[ALLOCA]], i64 0, i64 0 + // CHECK-DAG: store bfloat %x0, bfloat* %[[BASE]], align 16 + // + // CHECK-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x bfloat], [8 x bfloat]* %[[ALLOCA]], i64 0, i64 7 + // CHECK: store bfloat %x7, bfloat* %[[GEP]], align 2 + // CHECK-NOT: store + // CHECK: call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + // CHECK: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8bf16( %{{.*}}, bfloat* nonnull %[[BASE]]) + // CHECK: ret %[[LOAD]] + // expected-warning@+1 {{implicit declaration of function 'svdupq_n_bf16'}} + return SVE_ACLE_FUNC(svdupq, _n, _bf16, )(x0, x1, x2, x3, x4, x5, x6, x7); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr-bfloat.c @@ -0,0 +1,26 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) { + // CHECK-LABEL: test_svinsr_n_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv8bf16( %op1, bfloat %op2) + // CHECK: ret %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svinsr_n_bf16'}} + return SVE_ACLE_FUNC(svinsr, _n_bf16, , )(op1, op2); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lasta-bfloat.c @@ -0,0 +1,27 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +bfloat16_t test_svlasta_bf16(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svlasta_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( %[[PG]], %op) + // CHECK: ret bfloat %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svlasta_bf16'}} + return SVE_ACLE_FUNC(svlasta, _bf16, , )(pg, op); +} diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_lastb-bfloat.c @@ -0,0 +1,27 @@ +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t +// RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error -verify-ignore-unexpected=note %s + +// If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. +// ASM-NOT: warning +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +bfloat16_t test_svlastb_bf16(svbool_t pg, svbfloat16_t op) { + // CHECK-LABEL: test_svlastb_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( %[[PG]], %op) + // CHECK: ret bfloat %[[INTRINSIC]] + // expected-warning@+1 {{implicit declaration of function 'svlastb_bf16'}} + return SVE_ACLE_FUNC(svlastb, _bf16, , )(pg, op); +} diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -405,6 +405,10 @@ defm CPY_ZPmR : sve_int_perm_cpy_r<"cpy", AArch64dup_pred>; defm CPY_ZPmV : sve_int_perm_cpy_v<"cpy", AArch64dup_pred>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_3_Op_Pat; + } + // Duplicate FP scalar into all vector elements def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; @@ -418,6 +422,10 @@ (DUP_ZZI_S (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$src, ssub), 0)>; def : Pat<(nxv2f64 (AArch64dup (f64 FPR64:$src))), (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; + let Predicates = [HasSVE, HasBF16] in { + def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), + (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + } // Duplicate +0.0 into all vector elements def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; @@ -426,6 +434,9 @@ def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; + let Predicates = [HasSVE, HasBF16] in { + def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + } // Duplicate Int immediate into all vector elements def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))), @@ -468,6 +479,10 @@ defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>; defm EXT_ZZI : sve_int_perm_extract_i<"ext", AArch64ext>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_2_Op_Pat; + } + defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit", int_aarch64_sve_rbit>; defm REVB_ZPmZ : sve_int_perm_rev_revb<"revb", int_aarch64_sve_revb, bswap>; defm REVH_ZPmZ : sve_int_perm_rev_revh<"revh", int_aarch64_sve_revh>; @@ -536,11 +551,23 @@ defm CLASTA_ZPZ : sve_int_perm_clast_zz<0, "clasta", int_aarch64_sve_clasta>; defm CLASTB_ZPZ : sve_int_perm_clast_zz<1, "clastb", int_aarch64_sve_clastb>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_3_Op_Pat; + def : SVE_3_Op_Pat; + def : SVE_3_Op_Pat; + def : SVE_3_Op_Pat; + } + defm LASTA_RPZ : sve_int_perm_last_r<0, "lasta", AArch64lasta>; defm LASTB_RPZ : sve_int_perm_last_r<1, "lastb", AArch64lastb>; defm LASTA_VPZ : sve_int_perm_last_v<0, "lasta", AArch64lasta>; defm LASTB_VPZ : sve_int_perm_last_v<1, "lastb", AArch64lastb>; + let Predicates = [HasSVE, HasBF16] in { + def : SVE_2_Op_Pat; + def : SVE_2_Op_Pat; + } + // continuous load with reg+immediate defm LD1B_IMM : sve_mem_cld_si<0b0000, "ld1b", Z_b, ZPR8>; defm LD1B_H_IMM : sve_mem_cld_si<0b0001, "ld1b", Z_h, ZPR16>; @@ -1482,6 +1509,13 @@ def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + + } + + let Predicates = [IsLE, HasBF16, HasSVE] in { + def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; } let Predicates = [IsLE, HasSVE, HasBF16] in { diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll @@ -81,6 +81,14 @@ ret %out } +define @dup_bf16(bfloat %b) #0 { +; CHECK-LABEL: dup_bf16: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b) + ret %out +} + define @dup_imm_f16(half %b) { ; CHECK-LABEL: dup_imm_f16: ; CHECK: mov z0.h, #16.00000000 @@ -126,5 +134,9 @@ declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) declare @llvm.aarch64.sve.dup.x.nxv8f16(half) +declare @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat) declare @llvm.aarch64.sve.dup.x.nxv4f32(float) declare @llvm.aarch64.sve.dup.x.nxv2f64(double) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -57,6 +57,16 @@ ret %out } +define @clasta_bf16( %pg, %a, %b) #0 { +; CHECK-LABEL: clasta_bf16: +; CHECK: clasta z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clasta.nxv8bf16( %pg, + %a, + %b) + ret %out +} + define @clasta_f32( %pg, %a, %b) { ; CHECK-LABEL: clasta_f32: ; CHECK: clasta z0.s, p0, z0.s, z1.s @@ -131,6 +141,16 @@ ret half %out } +define bfloat @clasta_n_bf16( %pg, bfloat %a, %b) #0 { +; CHECK-LABEL: clasta_n_bf16: +; CHECK: clasta h0, p0, h0, z1.h +; CHECK-NEXT: ret + %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( %pg, + bfloat %a, + %b) + ret bfloat %out +} + define float @clasta_n_f32( %pg, float %a, %b) { ; CHECK-LABEL: clasta_n_f32: ; CHECK: clasta s0, p0, s0, z1.s @@ -205,6 +225,16 @@ ret %out } +define @clastb_bf16( %pg, %a, %b) #0 { +; CHECK-LABEL: clastb_bf16: +; CHECK: clastb z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clastb.nxv8bf16( %pg, + %a, + %b) + ret %out +} + define @clastb_f32( %pg, %a, %b) { ; CHECK-LABEL: clastb_f32: ; CHECK: clastb z0.s, p0, z0.s, z1.s @@ -279,6 +309,16 @@ ret half %out } +define bfloat @clastb_n_bf16( %pg, bfloat %a, %b) #0 { +; CHECK-LABEL: clastb_n_bf16: +; CHECK: clastb h0, p0, h0, z1.h +; CHECK-NEXT: ret + %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( %pg, + bfloat %a, + %b) + ret bfloat %out +} + define float @clastb_n_f32( %pg, float %a, %b) { ; CHECK-LABEL: clastb_n_f32: ; CHECK: clastb s0, p0, s0, z1.s @@ -343,6 +383,14 @@ ret %out } +define @dupq_bf16( %a) #0 { +; CHECK-LABEL: dupq_bf16: +; CHECK: mov z0.q, q0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %a, i64 0) + ret %out +} + define @dupq_f32( %a) { ; CHECK-LABEL: dupq_f32: ; CHECK: mov z0.q, z0.q[1] @@ -433,6 +481,20 @@ } ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). +define @dupq_lane_bf16( %a, i64 %idx) #0 { +; CHECK-LABEL: dupq_lane_bf16: +; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 +; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 +; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 +; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] +; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d +; CHECK: tbl z0.d, { z0.d }, [[Z4]].d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %a, i64 %idx) + ret %out +} + +; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). define @dupq_lane_f32( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f32: ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 @@ -595,6 +657,15 @@ ret half %res } +define bfloat @lasta_bf16( %pg, %a) #0 { +; CHECK-LABEL: lasta_bf16 +; CHECK: lasta h0, p0, z0.h +; CHECK-NEXT: ret + %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( %pg, + %a) + ret bfloat %res +} + define float @lasta_f32( %pg, %a) { ; CHECK-LABEL: lasta_f32 ; CHECK: lasta s0, p0, z0.s @@ -671,6 +742,15 @@ ret half %res } +define bfloat @lastb_bf16( %pg, %a) #0 { +; CHECK-LABEL: lastb_bf16 +; CHECK: lastb h0, p0, z0.h +; CHECK-NEXT: ret + %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( %pg, + %a) + ret bfloat %res +} + define float @lastb_f32( %pg, %a) { ; CHECK-LABEL: lastb_f32 ; CHECK: lastb s0, p0, z0.s @@ -1841,6 +1921,7 @@ declare @llvm.aarch64.sve.clasta.nxv4i32(, , ) declare @llvm.aarch64.sve.clasta.nxv2i64(, , ) declare @llvm.aarch64.sve.clasta.nxv8f16(, , ) +declare @llvm.aarch64.sve.clasta.nxv8bf16(, , ) declare @llvm.aarch64.sve.clasta.nxv4f32(, , ) declare @llvm.aarch64.sve.clasta.nxv2f64(, , ) @@ -1849,6 +1930,7 @@ declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(, i32, ) declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(, i64, ) declare half @llvm.aarch64.sve.clasta.n.nxv8f16(, half, ) +declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(, bfloat, ) declare float @llvm.aarch64.sve.clasta.n.nxv4f32(, float, ) declare double @llvm.aarch64.sve.clasta.n.nxv2f64(, double, ) @@ -1857,6 +1939,7 @@ declare @llvm.aarch64.sve.clastb.nxv4i32(, , ) declare @llvm.aarch64.sve.clastb.nxv2i64(, , ) declare @llvm.aarch64.sve.clastb.nxv8f16(, , ) +declare @llvm.aarch64.sve.clastb.nxv8bf16(, , ) declare @llvm.aarch64.sve.clastb.nxv4f32(, , ) declare @llvm.aarch64.sve.clastb.nxv2f64(, , ) @@ -1865,6 +1948,7 @@ declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(, i32, ) declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(, i64, ) declare half @llvm.aarch64.sve.clastb.n.nxv8f16(, half, ) +declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(, bfloat, ) declare float @llvm.aarch64.sve.clastb.n.nxv4f32(, float, ) declare double @llvm.aarch64.sve.clastb.n.nxv2f64(, double, ) @@ -1878,6 +1962,7 @@ declare @llvm.aarch64.sve.dupq.lane.nxv4i32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2i64(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8f16(, i64) +declare @llvm.aarch64.sve.dupq.lane.nxv8bf16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv4f32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2f64(, i64) @@ -1894,6 +1979,7 @@ declare i32 @llvm.aarch64.sve.lasta.nxv4i32(, ) declare i64 @llvm.aarch64.sve.lasta.nxv2i64(, ) declare half @llvm.aarch64.sve.lasta.nxv8f16(, ) +declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(, ) declare float @llvm.aarch64.sve.lasta.nxv2f32(, ) declare float @llvm.aarch64.sve.lasta.nxv4f32(, ) declare double @llvm.aarch64.sve.lasta.nxv2f64(, ) @@ -1903,6 +1989,7 @@ declare i32 @llvm.aarch64.sve.lastb.nxv4i32(, ) declare i64 @llvm.aarch64.sve.lastb.nxv2i64(, ) declare half @llvm.aarch64.sve.lastb.nxv8f16(, ) +declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(, ) declare float @llvm.aarch64.sve.lastb.nxv2f32(, ) declare float @llvm.aarch64.sve.lastb.nxv4f32(, ) declare double @llvm.aarch64.sve.lastb.nxv2f64(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll @@ -57,6 +57,16 @@ ret %out } +define @dup_bf16( %a, %pg, bfloat %b) #0 { +; CHECK-LABEL: dup_bf16: +; CHECK: mov z0.h, p0/m, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.nxv8bf16( %a, + %pg, + bfloat %b) + ret %out +} + define @dup_f32( %a, %pg, float %b) { ; CHECK-LABEL: dup_f32: ; CHECK: mov z0.s, p0/m, s1 @@ -77,10 +87,41 @@ ret %out } +define @test_svdup_n_bf16_z( %pg, bfloat %op) #0 { +; CHECK-LABEL: test_svdup_n_bf16_z: +; CHECK: mov z1.h, #0 +; CHECK: mov z1.h, p0/m, h0 +; CHECK: mov z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, %pg, bfloat %op) + ret %out +} + +define @test_svdup_n_bf16_m( %inactive, %pg, bfloat %op) #0 { +; CHECK-LABEL: test_svdup_n_bf16_m: +; CHECK: mov z0.h, p0/m, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.nxv8bf16( %inactive, %pg, bfloat %op) + ret %out +} + + +define @test_svdup_n_bf16_x( %pg, bfloat %op) #0 { +; CHECK-LABEL: test_svdup_n_bf16_x: +; CHECK: mov z0.h, p0/m, h0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.nxv8bf16( undef, %pg, bfloat %op) + ret %out +} + declare @llvm.aarch64.sve.dup.nxv16i8(, , i8) declare @llvm.aarch64.sve.dup.nxv8i16(, , i16) declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) declare @llvm.aarch64.sve.dup.nxv2i64(, , i64) declare @llvm.aarch64.sve.dup.nxv8f16(, , half) +declare @llvm.aarch64.sve.dup.nxv8bf16(, , bfloat) declare @llvm.aarch64.sve.dup.nxv4f32(, , float) declare @llvm.aarch64.sve.dup.nxv2f64(, , double) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll @@ -165,6 +165,14 @@ ret %out } +define @insr_bf16( %a, bfloat %b) #0 { +; CHECK-LABEL: insr_bf16: +; CHECK: insr z0.h, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv8bf16( %a, bfloat %b) + ret %out +} + define @insr_f32( %a, float %b) { ; CHECK-LABEL: insr_f32: ; CHECK: insr z0.s, s1 @@ -348,6 +356,7 @@ declare @llvm.aarch64.sve.insr.nxv4i32(, i32) declare @llvm.aarch64.sve.insr.nxv2i64(, i64) declare @llvm.aarch64.sve.insr.nxv8f16(, half) +declare @llvm.aarch64.sve.insr.nxv8bf16(, bfloat) declare @llvm.aarch64.sve.insr.nxv4f32(, float) declare @llvm.aarch64.sve.insr.nxv2f64(, double) @@ -368,3 +377,6 @@ declare @llvm.aarch64.sve.lsr.wide.nxv16i8(, , ) declare @llvm.aarch64.sve.lsr.wide.nxv8i16(, , ) declare @llvm.aarch64.sve.lsr.wide.nxv4i32(, , ) + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" } diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -172,6 +172,15 @@ ;; Splats of legal floating point vector types +define @splat_nxv8bf16(bfloat %val) #0 { +; CHECK-LABEL: splat_nxv8bf16: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %1 = insertelement undef, bfloat %val, i32 0 + %2 = shufflevector %1, undef, zeroinitializer + ret %2 +} + define @splat_nxv8f16(half %val) { ; CHECK-LABEL: splat_nxv8f16: ; CHECK: mov z0.h, h0 @@ -233,6 +242,13 @@ ret zeroinitializer } +define @splat_nxv8bf16_zero() #0 { +; CHECK-LABEL: splat_nxv8bf16_zero: +; CHECK: mov z0.h, #0 +; CHECK-NEXT: ret + ret zeroinitializer +} + define @splat_nxv4f16_zero() { ; CHECK-LABEL: splat_nxv4f16_zero: ; CHECK: mov z0.h, #0 @@ -321,3 +337,6 @@ %2 = shufflevector %1, undef, zeroinitializer ret %2 } + +; +bf16 is required for the bfloat version. +attributes #0 = { "target-features"="+sve,+bf16" }