diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -678,13 +678,23 @@ def SVDUPQ_8 : SInst<"svdupq[_n]_{d}", "dssssssssssssssss", "cUc", MergeNone>; def SVDUPQ_16 : SInst<"svdupq[_n]_{d}", "dssssssss", "sUsh", MergeNone>; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) " in { + def SVDUPQ_BF16 : SInst<"svdupq[_n]_{d}", "dssssssss", "b", MergeNone>; +} def SVDUPQ_32 : SInst<"svdupq[_n]_{d}", "dssss", "iUif", MergeNone>; def SVDUPQ_64 : SInst<"svdupq[_n]_{d}", "dss", "lUld", MergeNone>; -def SVDUP : SInst<"svdup[_n]_{d}", "ds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dup_x">; -def SVDUP_M : SInst<"svdup[_n]_{d}", "ddPs", "csilUcUsUiUlhfd", MergeOp1, "aarch64_sve_dup">; -def SVDUP_X : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeAnyExp, "aarch64_sve_dup">; -def SVDUP_Z : SInst<"svdup[_n]_{d}", "dPs", "csilUcUsUiUlhfd", MergeZeroExp, "aarch64_sve_dup">; +multiclass svdup_base { + def NAME : SInst; + let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) " in { + def _BF16: SInst; + } +} + +defm SVDUP : svdup_base<"svdup[_n]_{d}", "ds", MergeNone, "aarch64_sve_dup_x">; +defm SVDUP_M : svdup_base<"svdup[_n]_{d}", "ddPs", MergeOp1, "aarch64_sve_dup">; +defm SVDUP_X : svdup_base<"svdup[_n]_{d}", "dPs", MergeAnyExp, "aarch64_sve_dup">; +defm SVDUP_Z : svdup_base<"svdup[_n]_{d}", "dPs", MergeZeroExp, "aarch64_sve_dup">; def SVINDEX : SInst<"svindex_{d}", "dss", "csilUcUsUiUl", MergeNone, "aarch64_sve_index">; @@ -803,8 +813,11 @@ def SVASRD_M : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeOp1, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_X : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeAny, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; def SVASRD_Z : SInst<"svasrd[_n_{d}]", "dPdi", "csil", MergeZero, "aarch64_sve_asrd", [], [ImmCheck<2, ImmCheckShiftRight, 1>]>; -def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; +def SVINSR : SInst<"svinsr[_n_{d}]", "dds", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_insr">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) " in { + def SVINSR_BF16 : SInst<"svinsr[_n_{d}]", "dds", "b", MergeNone, "aarch64_sve_insr">; +} //////////////////////////////////////////////////////////////////////////////// // Integer reductions @@ -1133,6 +1146,9 @@ // instruction's immediate. def SVDUP_LANE : SInst<"svdup_lane[_{d}]", "ddL", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_tbl">; def SVDUPQ_LANE : SInst<"svdupq_lane[_{d}]", "ddn", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_dupq_lane">; +let ArchGuard = "defined(__ARM_FEATURE_SVE_BF16) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC) " in { + def SVDUPQ_LANE_BF16 : SInst<"svdupq_lane[_{d}]", "ddn", "b", MergeNone, "aarch64_sve_dupq_lane">; +} def SVEXT : SInst<"svext[_{d}]", "dddi", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ext", [], [ImmCheck<2, ImmCheckExtract, 1>]>; def SVLASTA : SInst<"svlasta[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lasta">; def SVLASTB : SInst<"svlastb[_{d}]", "sPd", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_lastb">; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7720,6 +7720,9 @@ return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 4); case SVETypeFlags::EltTyBool64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + + case SVETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); } } @@ -8363,6 +8366,7 @@ case SVE::BI__builtin_sve_svdupq_n_s64: case SVE::BI__builtin_sve_svdupq_n_u16: case SVE::BI__builtin_sve_svdupq_n_f16: + case SVE::BI__builtin_sve_svdupq_n_bf16: case SVE::BI__builtin_sve_svdupq_n_s16: case SVE::BI__builtin_sve_svdupq_n_u32: case SVE::BI__builtin_sve_svdupq_n_f32: diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dup.c @@ -1,6 +1,9 @@ // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DENABLE_BF16_TESTS -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECKBF16 +// RUN: %clang_cc1 -DENABLE_BF16_TESTS -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECKBF16 + #include #ifdef SVE_OVERLOADED_FORMS @@ -82,6 +85,16 @@ return SVE_ACLE_FUNC(svdup,_n,_f16,)(op); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svdup_n_bf16(bfloat16_t op) +{ + // CHECKBF16-LABEL: test_svdup_n_bf16 + // CHECKBF16: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %op) + // CHECKBF16: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdup,_n,_bf16,)(op); +} +#endif + svfloat32_t test_svdup_n_f32(float32_t op) { // CHECK-LABEL: test_svdup_n_f32 @@ -177,6 +190,17 @@ return SVE_ACLE_FUNC(svdup,_n,_f16_z,)(pg, op); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svdup_n_bf16_z(svbool_t pg, bfloat16_t op) +{ + // CHECKBF16-LABEL: test_svdup_n_bf16_z + // CHECKBF16: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECKBF16: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, %[[PG]], bfloat %op) + // CHECKBF16: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdup,_n,_bf16_z,)(pg, op); +} +#endif + svfloat32_t test_svdup_n_f32_z(svbool_t pg, float32_t op) { // CHECK-LABEL: test_svdup_n_f32_z @@ -274,6 +298,17 @@ return SVE_ACLE_FUNC(svdup,_n,_f16_m,)(inactive, pg, op); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svdup_n_bf16_m(svbfloat16_t inactive, svbool_t pg, bfloat16_t op) +{ + // CHECKBF16-LABEL: test_svdup_n_bf16_m + // CHECKBF16: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECKBF16: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.nxv8bf16( %inactive, %[[PG]], bfloat %op) + // CHECKBF16: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdup,_n,_bf16_m,)(inactive, pg, op); +} +#endif + svfloat32_t test_svdup_n_f32_m(svfloat32_t inactive, svbool_t pg, float32_t op) { // CHECK-LABEL: test_svdup_n_f32_m @@ -371,6 +406,17 @@ return SVE_ACLE_FUNC(svdup,_n,_f16_x,)(pg, op); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svdup_n_bf16_x(svbool_t pg, bfloat16_t op) +{ + // CHECKBF16-LABEL: test_svdup_n_bf16_x + // CHECKBF16: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECKBF16: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dup.nxv8bf16( undef, %[[PG]], bfloat %op) + // CHECKBF16: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdup,_n,_bf16_x,)(pg, op); +} +#endif + svfloat32_t test_svdup_n_f32_x(svbool_t pg, float32_t op) { // CHECK-LABEL: test_svdup_n_f32_x diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_dupq.c @@ -1,6 +1,10 @@ // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DENABLE_BF16_TESTS -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECKBF16 +// RUN: %clang_cc1 -DENABLE_BF16_TESTS -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECKBF16 + + #include #ifdef SVE_OVERLOADED_FORMS @@ -82,6 +86,16 @@ return SVE_ACLE_FUNC(svdupq_lane,_f16,,)(data, index); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svdupq_lane_bf16(svbfloat16_t data, uint64_t index) +{ + // CHECKBF16-LABEL: test_svdupq_lane_bf16 + // CHECKBF16: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %data, i64 %index) + // CHECKBF16: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svdupq_lane,_bf16,,)(data, index); +} +#endif + svfloat32_t test_svdupq_lane_f32(svfloat32_t data, uint64_t index) { // CHECK-LABEL: test_svdupq_lane_f32 @@ -249,6 +263,25 @@ return SVE_ACLE_FUNC(svdupq,_n,_f16,)(x0, x1, x2, x3, x4, x5, x6, x7); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svdupq_n_bf16(bfloat16_t x0, bfloat16_t x1, bfloat16_t x2, bfloat16_t x3, + bfloat16_t x4, bfloat16_t x5, bfloat16_t x6, bfloat16_t x7) +{ + // CHECKBF16-LABEL: test_svdupq_n_bf16 + // CHECKBF16: %[[ALLOCA:.*]] = alloca [8 x bfloat], align 16 + // CHECKBF16-DAG: %[[BASE:.*]] = getelementptr inbounds [8 x bfloat], [8 x bfloat]* %[[ALLOCA]], i64 0, i64 0 + // CHECKBF16-DAG: store bfloat %x0, bfloat* %[[BASE]], align 16 + // + // CHECKBF16-DAG: %[[GEP:.*]] = getelementptr inbounds [8 x bfloat], [8 x bfloat]* %[[ALLOCA]], i64 0, i64 7 + // CHECKBF16: store bfloat %x7, bfloat* %[[GEP]], align 2 + // CHECKBF16-NOT: store + // CHECKBF16: call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) + // CHECKBF16: %[[LOAD:.*]] = call @llvm.aarch64.sve.ld1rq.nxv8bf16( %{{.*}}, bfloat* nonnull %[[BASE]]) + // CHECKBF16: ret %[[LOAD]] + return SVE_ACLE_FUNC(svdupq,_n,_bf16,)(x0, x1, x2, x3, x4, x5, x6, x7); +} +#endif + svfloat32_t test_svdupq_n_f32(float32_t x0, float32_t x1, float32_t x2, float32_t x3) { // CHECK-LABEL: test_svdupq_n_f32 diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c --- a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_insr.c @@ -4,6 +4,9 @@ // RUN: %clang_cc1 -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -o - %s >/dev/null 2>%t // RUN: FileCheck --check-prefix=ASM --allow-empty %s <%t +// RUN: %clang_cc1 -DENABLE_BFLOAT_TESTS -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECKBF16 +// RUN: %clang_cc1 -DENABLE_BFLOAT_TESTS -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE_BF16 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECKBF16 + // If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it. // ASM-NOT: warning #include @@ -87,6 +90,16 @@ return SVE_ACLE_FUNC(svinsr,_n_f16,,)(op1, op2); } +#ifdef ENABLE_BF16_TESTS +svbfloat16_t test_svinsr_n_bf16(svbfloat16_t op1, bfloat16_t op2) +{ + // CHECK16-LABEL: test_svinsr_n_bf16 + // CHECK16: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.insr.nxv8bf16( %op1, bfloat %op2) + // CHECK16: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svinsr,_n_bf16,,)(op1, op2); +} +#endif + svfloat32_t test_svinsr_n_f32(svfloat32_t op1, float32_t op2) { // CHECK-LABEL: test_svinsr_n_f32 diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -403,6 +403,8 @@ // Duplicate FP scalar into all vector elements def : Pat<(nxv8f16 (AArch64dup (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; + def : Pat<(nxv8bf16 (AArch64dup (bf16 FPR16:$src))), + (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; def : Pat<(nxv4f16 (AArch64dup (f16 FPR16:$src))), (DUP_ZZI_H (INSERT_SUBREG (IMPLICIT_DEF), FPR16:$src, hsub), 0)>; def : Pat<(nxv2f16 (AArch64dup (f16 FPR16:$src))), @@ -415,12 +417,13 @@ (DUP_ZZI_D (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$src, dsub), 0)>; // Duplicate +0.0 into all vector elements - def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; - def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; - def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; + def : Pat<(nxv8f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv8bf16 (AArch64dup (bf16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv2f16 (AArch64dup (f16 fpimm0))), (DUP_ZI_H 0, 0)>; + def : Pat<(nxv4f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; + def : Pat<(nxv2f32 (AArch64dup (f32 fpimm0))), (DUP_ZI_S 0, 0)>; + def : Pat<(nxv2f64 (AArch64dup (f64 fpimm0))), (DUP_ZI_D 0, 0)>; // Duplicate Int immediate into all vector elements def : Pat<(nxv16i8 (AArch64dup (i32 (SVE8BitLslImm i32:$a, i32:$b)))), @@ -1430,12 +1433,12 @@ def : Pat<(nxv2i64 (bitconvert (nxv8i16 ZPR:$src))), (nxv2i64 ZPR:$src)>; def : Pat<(nxv2i64 (bitconvert (nxv4i32 ZPR:$src))), (nxv2i64 ZPR:$src)>; def : Pat<(nxv2i64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2i64 ZPR:$src)>; + def : Pat<(nxv2i64 (bitconvert (nxv8bf16 ZPR:$src))), (nxv2i64 ZPR:$src)>; def : Pat<(nxv2i64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2i64 ZPR:$src)>; def : Pat<(nxv2i64 (bitconvert (nxv2f64 ZPR:$src))), (nxv2i64 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv16i8 ZPR:$src))), (nxv8f16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8f16 ZPR:$src)>; - def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv4i32 ZPR:$src))), (nxv8f16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8f16 ZPR:$src)>; def : Pat<(nxv8f16 (bitconvert (nxv4f32 ZPR:$src))), (nxv8f16 ZPR:$src)>; @@ -1454,6 +1457,9 @@ def : Pat<(nxv2f64 (bitconvert (nxv2i64 ZPR:$src))), (nxv2f64 ZPR:$src)>; def : Pat<(nxv2f64 (bitconvert (nxv8f16 ZPR:$src))), (nxv2f64 ZPR:$src)>; def : Pat<(nxv2f64 (bitconvert (nxv4f32 ZPR:$src))), (nxv2f64 ZPR:$src)>; + + def : Pat<(nxv8bf16 (bitconvert (nxv2i64 ZPR:$src))), (nxv8bf16 ZPR:$src)>; + def : Pat<(nxv8bf16 (bitconvert (nxv8i16 ZPR:$src))), (nxv8bf16 ZPR:$src)>; } def : Pat<(nxv16i1 (reinterpret_cast (nxv16i1 PPR:$src))), (COPY_TO_REGCLASS PPR:$src, PPR)>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -1237,6 +1237,7 @@ def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>; def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _H)>; def : SVE_2_Op_Pat(NAME # _S)>; def : SVE_2_Op_Pat(NAME # _D)>; } @@ -5669,9 +5670,10 @@ def _S : sve_int_perm_clast_vz<0b10, ab, asm, ZPR32, FPR32>; def _D : sve_int_perm_clast_vz<0b11, ab, asm, ZPR64, FPR64>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } class sve_int_perm_clast_zz sz8_64, bit ab, string asm, @@ -5708,9 +5710,10 @@ def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; - def : SVE_3_Op_Pat(NAME # _H)>; - def : SVE_3_Op_Pat(NAME # _S)>; - def : SVE_3_Op_Pat(NAME # _D)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _S)>; + def : SVE_3_Op_Pat(NAME # _D)>; } class sve_int_perm_last_r sz8_64, bit ab, string asm, @@ -5769,10 +5772,11 @@ def _S : sve_int_perm_last_v<0b10, ab, asm, ZPR32, FPR32>; def _D : sve_int_perm_last_v<0b11, ab, asm, ZPR64, FPR64>; - def : SVE_2_Op_Pat(NAME # _H)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _S)>; - def : SVE_2_Op_Pat(NAME # _D)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _H)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _S)>; + def : SVE_2_Op_Pat(NAME # _D)>; } class sve_int_perm_splice sz8_64, string asm, ZPRRegOp zprty> @@ -5979,6 +5983,7 @@ (!cast(NAME # _D) ZPR64:$Zd, PPR3bAny:$Pg, FPR64:$Vn), 1>; def : SVE_3_Op_Pat(NAME # _H)>; + def : SVE_3_Op_Pat(NAME # _H)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _S)>; def : SVE_3_Op_Pat(NAME # _D)>; diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-bfloat.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-bfloat.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-bfloat.ll @@ -0,0 +1,32 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s + +define @test_svdup_n_bf16_z( %pg, bfloat %op) local_unnamed_addr #0 { +; CHECK-LABEL: test_svdup_n_bf16_z: +entry: + %0 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %1 = call @llvm.aarch64.sve.dup.nxv8bf16( zeroinitializer, %0, bfloat %op) + ret %1 +} + +; Function Attrs: nounwind readnone +declare @llvm.aarch64.sve.dup.nxv8bf16(, , bfloat) #1 + + +define @test_svdup_n_bf16_m( %inactive, %pg, bfloat %op) local_unnamed_addr #0 { +; CHECK-LABEL: test_svdup_n_bf16_m: +entry: + %0 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %1 = call @llvm.aarch64.sve.dup.nxv8bf16( %inactive, %0, bfloat %op) + ret %1 +} + + +define @test_svdup_n_bf16_x( %pg, bfloat %op) local_unnamed_addr #0 { +; CHECK-LABEL: test_svdup_n_bf16_x: +entry: + %0 = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + %1 = call @llvm.aarch64.sve.dup.nxv8bf16( undef, %0, bfloat %op) + ret %1 +} + +declare @llvm.aarch64.sve.convert.from.svbool.nxv8i1() diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -mattr=+bf16 -asm-verbose=0 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning @@ -81,6 +81,14 @@ ret %out } +define @dup_bf16(bfloat %b) { +; CHECK-LABEL: dup_bf16: +; CHECK: mov z0.h, h0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat %b) + ret %out +} + define @dup_imm_f16(half %b) { ; CHECK-LABEL: dup_imm_f16: ; CHECK: mov z0.h, #16.00000000 @@ -126,5 +134,6 @@ declare @llvm.aarch64.sve.dup.x.nxv4i32(i32) declare @llvm.aarch64.sve.dup.x.nxv2i64(i64) declare @llvm.aarch64.sve.dup.x.nxv8f16(half) +declare @llvm.aarch64.sve.dup.x.nxv8bf16(bfloat) declare @llvm.aarch64.sve.dup.x.nxv4f32(float) declare @llvm.aarch64.sve.dup.x.nxv2f64(double) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning @@ -57,6 +57,16 @@ ret %out } +define @clasta_bf16( %pg, %a, %b) { +; CHECK-LABEL: clasta_bf16: +; CHECK: clasta z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clasta.nxv8bf16( %pg, + %a, + %b) + ret %out +} + define @clasta_f32( %pg, %a, %b) { ; CHECK-LABEL: clasta_f32: ; CHECK: clasta z0.s, p0, z0.s, z1.s @@ -131,6 +141,16 @@ ret half %out } +define bfloat @clasta_n_bf16( %pg, bfloat %a, %b) { +; CHECK-LABEL: clasta_n_bf16: +; CHECK: clasta h0, p0, h0, z1.h +; CHECK-NEXT: ret + %out = call bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16( %pg, + bfloat %a, + %b) + ret bfloat %out +} + define float @clasta_n_f32( %pg, float %a, %b) { ; CHECK-LABEL: clasta_n_f32: ; CHECK: clasta s0, p0, s0, z1.s @@ -205,6 +225,16 @@ ret %out } +define @clastb_bf16( %pg, %a, %b) { +; CHECK-LABEL: clastb_bf16: +; CHECK: clastb z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.clastb.nxv8bf16( %pg, + %a, + %b) + ret %out +} + define @clastb_f32( %pg, %a, %b) { ; CHECK-LABEL: clastb_f32: ; CHECK: clastb z0.s, p0, z0.s, z1.s @@ -279,6 +309,16 @@ ret half %out } +define bfloat @clastb_n_bf16( %pg, bfloat %a, %b) { +; CHECK-LABEL: clastb_n_bf16: +; CHECK: clastb h0, p0, h0, z1.h +; CHECK-NEXT: ret + %out = call bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16( %pg, + bfloat %a, + %b) + ret bfloat %out +} + define float @clastb_n_f32( %pg, float %a, %b) { ; CHECK-LABEL: clastb_n_f32: ; CHECK: clastb s0, p0, s0, z1.s @@ -343,6 +383,14 @@ ret %out } +define @dupq_bf16( %a) { +; CHECK-LABEL: dupq_bf16: +; CHECK: mov z0.q, q0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %a, i64 0) + ret %out +} + define @dupq_f32( %a) { ; CHECK-LABEL: dupq_f32: ; CHECK: mov z0.q, z0.q[1] @@ -433,6 +481,20 @@ } ; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). +define @dupq_lane_bf16( %a, i64 %idx) { +; CHECK-LABEL: dupq_lane_bf16: +; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 +; CHECK-DAG: and [[Z2:z[0-9]+]].d, [[Z1]].d, #0x1 +; CHECK-DAG: add [[X1:x[0-9]+]], x0, x0 +; CHECK-DAG: mov [[Z3:z[0-9]+]].d, [[X1]] +; CHECK: add [[Z4:z[0-9]+]].d, [[Z2]].d, [[Z3]].d +; CHECK: tbl z0.d, { z0.d }, [[Z4]].d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dupq.lane.nxv8bf16( %a, i64 %idx) + ret %out +} + +; NOTE: Identical operation to dupq_lane_i8 (i.e. element type is irrelevant). define @dupq_lane_f32( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f32: ; CHECK-DAG: index [[Z1:z[0-9]+]].d, #0, #1 @@ -595,6 +657,15 @@ ret half %res } +define bfloat @lasta_bf16( %pg, %a) { +; CHECK-LABEL: lasta_bf16 +; CHECK: lasta h0, p0, z0.h +; CHECK-NEXT: ret + %res = call bfloat @llvm.aarch64.sve.lasta.nxv8bf16( %pg, + %a) + ret bfloat %res +} + define float @lasta_f32( %pg, %a) { ; CHECK-LABEL: lasta_f32 ; CHECK: lasta s0, p0, z0.s @@ -671,6 +742,15 @@ ret half %res } +define bfloat @lastb_bf16( %pg, %a) { +; CHECK-LABEL: lastb_bf16 +; CHECK: lastb h0, p0, z0.h +; CHECK-NEXT: ret + %res = call bfloat @llvm.aarch64.sve.lastb.nxv8bf16( %pg, + %a) + ret bfloat %res +} + define float @lastb_f32( %pg, %a) { ; CHECK-LABEL: lastb_f32 ; CHECK: lastb s0, p0, z0.s @@ -1760,6 +1840,7 @@ declare @llvm.aarch64.sve.clasta.nxv4i32(, , ) declare @llvm.aarch64.sve.clasta.nxv2i64(, , ) declare @llvm.aarch64.sve.clasta.nxv8f16(, , ) +declare @llvm.aarch64.sve.clasta.nxv8bf16(, , ) declare @llvm.aarch64.sve.clasta.nxv4f32(, , ) declare @llvm.aarch64.sve.clasta.nxv2f64(, , ) @@ -1768,6 +1849,7 @@ declare i32 @llvm.aarch64.sve.clasta.n.nxv4i32(, i32, ) declare i64 @llvm.aarch64.sve.clasta.n.nxv2i64(, i64, ) declare half @llvm.aarch64.sve.clasta.n.nxv8f16(, half, ) +declare bfloat @llvm.aarch64.sve.clasta.n.nxv8bf16(, bfloat, ) declare float @llvm.aarch64.sve.clasta.n.nxv4f32(, float, ) declare double @llvm.aarch64.sve.clasta.n.nxv2f64(, double, ) @@ -1776,6 +1858,7 @@ declare @llvm.aarch64.sve.clastb.nxv4i32(, , ) declare @llvm.aarch64.sve.clastb.nxv2i64(, , ) declare @llvm.aarch64.sve.clastb.nxv8f16(, , ) +declare @llvm.aarch64.sve.clastb.nxv8bf16(, , ) declare @llvm.aarch64.sve.clastb.nxv4f32(, , ) declare @llvm.aarch64.sve.clastb.nxv2f64(, , ) @@ -1784,6 +1867,7 @@ declare i32 @llvm.aarch64.sve.clastb.n.nxv4i32(, i32, ) declare i64 @llvm.aarch64.sve.clastb.n.nxv2i64(, i64, ) declare half @llvm.aarch64.sve.clastb.n.nxv8f16(, half, ) +declare bfloat @llvm.aarch64.sve.clastb.n.nxv8bf16(, bfloat, ) declare float @llvm.aarch64.sve.clastb.n.nxv4f32(, float, ) declare double @llvm.aarch64.sve.clastb.n.nxv2f64(, double, ) @@ -1797,6 +1881,7 @@ declare @llvm.aarch64.sve.dupq.lane.nxv4i32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2i64(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv8f16(, i64) +declare @llvm.aarch64.sve.dupq.lane.nxv8bf16(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv4f32(, i64) declare @llvm.aarch64.sve.dupq.lane.nxv2f64(, i64) @@ -1813,6 +1898,7 @@ declare i32 @llvm.aarch64.sve.lasta.nxv4i32(, ) declare i64 @llvm.aarch64.sve.lasta.nxv2i64(, ) declare half @llvm.aarch64.sve.lasta.nxv8f16(, ) +declare bfloat @llvm.aarch64.sve.lasta.nxv8bf16(, ) declare float @llvm.aarch64.sve.lasta.nxv2f32(, ) declare float @llvm.aarch64.sve.lasta.nxv4f32(, ) declare double @llvm.aarch64.sve.lasta.nxv2f64(, ) @@ -1822,6 +1908,7 @@ declare i32 @llvm.aarch64.sve.lastb.nxv4i32(, ) declare i64 @llvm.aarch64.sve.lastb.nxv2i64(, ) declare half @llvm.aarch64.sve.lastb.nxv8f16(, ) +declare bfloat @llvm.aarch64.sve.lastb.nxv8bf16(, ) declare float @llvm.aarch64.sve.lastb.nxv2f32(, ) declare float @llvm.aarch64.sve.lastb.nxv4f32(, ) declare double @llvm.aarch64.sve.lastb.nxv2f64(, ) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning @@ -57,6 +57,16 @@ ret %out } +define @dup_bf16( %a, %pg, bfloat %b) { +; CHECK-LABEL: dup_bf16: +; CHECK: mov z0.h, p0/m, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.dup.nxv8bf16( %a, + %pg, + bfloat %b) + ret %out +} + define @dup_f32( %a, %pg, float %b) { ; CHECK-LABEL: dup_f32: ; CHECK: mov z0.s, p0/m, s1 @@ -82,5 +92,6 @@ declare @llvm.aarch64.sve.dup.nxv4i32(, , i32) declare @llvm.aarch64.sve.dup.nxv2i64(, , i64) declare @llvm.aarch64.sve.dup.nxv8f16(, , half) +declare @llvm.aarch64.sve.dup.nxv8bf16(, , bfloat) declare @llvm.aarch64.sve.dup.nxv4f32(, , float) declare @llvm.aarch64.sve.dup.nxv2f64(, , double) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t ; WARN-NOT: warning @@ -165,6 +165,14 @@ ret %out } +define @insr_bf16( %a, bfloat %b) { +; CHECK-LABEL: insr_bf16: +; CHECK: insr z0.h, h1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.insr.nxv8bf16( %a, bfloat %b) + ret %out +} + define @insr_f32( %a, float %b) { ; CHECK-LABEL: insr_f32: ; CHECK: insr z0.s, s1 @@ -348,6 +356,7 @@ declare @llvm.aarch64.sve.insr.nxv4i32(, i32) declare @llvm.aarch64.sve.insr.nxv2i64(, i64) declare @llvm.aarch64.sve.insr.nxv8f16(, half) +declare @llvm.aarch64.sve.insr.nxv8bf16(, bfloat) declare @llvm.aarch64.sve.insr.nxv4f32(, float) declare @llvm.aarch64.sve.insr.nxv2f64(, double)