Index: clang/include/clang/Basic/arm_sve.td =================================================================== --- clang/include/clang/Basic/arm_sve.td +++ clang/include/clang/Basic/arm_sve.td @@ -1926,6 +1926,11 @@ def SVWHILEWR_D : SInst<"svwhilewr[_{1}]", "Pcc", "lUld", MergeNone, "aarch64_sve_whilewr_d", [IsOverloadWhileRW]>; } +let ArchGuard = "defined(__ARM_FEATURE_SVE2) && defined(__ARM_FEATURE_SVE_BF16)" in { +def SVWHILERW_H_BF16 : SInst<"svwhilerw[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilerw_h", [IsOverloadWhileRW]>; +def SVWHILEWR_H_BF16 : SInst<"svwhilewr[_{1}]", "Pcc", "b", MergeNone, "aarch64_sve_whilewr_h", [IsOverloadWhileRW]>; +} + //////////////////////////////////////////////////////////////////////////////// // SVE2 - Extended table lookup/permute let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in { Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -7705,6 +7705,8 @@ case SVETypeFlags::EltTyInt64: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 2); + case SVETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getInt1Ty(), 8); case SVETypeFlags::EltTyFloat32: Index: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilerw-bfloat.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// Test expected warnings for implicit declaration when +sve2 is missing +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +// Test expected warnings for implicit declaration when +bf16 is missing +// NOTE: +bf16 doesn't currently imply __ARM_FEATURE_SVE_BF16, once the +// implementation is complete it will, at which point -target-feature +bf16 +// should be removed. +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s + +// Test expected ambiguous call error for overloaded form when +bf16 is missing +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload-bf16 -verify-ignore-unexpected=note %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svwhilerw_bf16(const bfloat16_t *op1, const bfloat16_t *op2) +{ + // CHECK-LABEL: test_svwhilerw_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.whilerw.h.nxv8i1.p0bf16(bfloat* %op1, bfloat* %op2) + // CHECK: %[[INTRINSIC_REINT:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[INTRINSIC_REINT]] + // overload-warning@+3 {{implicit declaration of function 'svwhilerw'}} + // expected-warning@+2 {{implicit declaration of function 'svwhilerw_bf16'}} + // overload-bf16-error@+1 {{call to 'svwhilerw' is ambiguous}} + return SVE_ACLE_FUNC(svwhilerw,_bf16,,)(op1, op2); +} Index: clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c =================================================================== --- /dev/null +++ clang/test/CodeGen/aarch64-sve2-intrinsics/acle_sve2_whilewr-bfloat.c @@ -0,0 +1,36 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +// Test expected warnings for implicit declaration when +sve2 is missing +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE_BF16 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload -verify-ignore-unexpected=error %s + +// Test expected warnings for implicit declaration when +bf16 is missing +// NOTE: +bf16 doesn't currently imply __ARM_FEATURE_SVE_BF16, once the +// implementation is complete it will, at which point -target-feature +bf16 +// should be removed. +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify -verify-ignore-unexpected=error %s + +// Test expected ambiguous call error for overloaded form when +bf16 is missing +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE -D__ARM_FEATURE_SVE2 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +bf16 -fallow-half-arguments-and-returns -fsyntax-only -verify=overload-bf16 -verify-ignore-unexpected=note %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3,A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3,A4) A1##A2##A3##A4 +#endif + +svbool_t test_svwhilewr_bf16(const bfloat16_t *op1, const bfloat16_t *op2) +{ + // CHECK-LABEL: test_svwhilewr_bf16 + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.whilewr.h.nxv8i1.p0bf16(bfloat* %op1, bfloat* %op2) + // CHECK: %[[INTRINSIC_REINT:.*]] = call @llvm.aarch64.sve.convert.to.svbool.nxv8i1( %[[INTRINSIC]]) + // CHECK: ret %[[INTRINSIC_REINT]] + // overload-warning@+3 {{implicit declaration of function 'svwhilewr'}} + // expected-warning@+2 {{implicit declaration of function 'svwhilewr_bf16'}} + // overload-bf16-error@+1 {{call to 'svwhilewr' is ambiguous}} + return SVE_ACLE_FUNC(svwhilewr,_bf16,,)(op1, op2); +} Index: llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll =================================================================== --- llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll +++ llvm/test/CodeGen/AArch64/sve2-intrinsics-contiguous-conflict-detection.ll @@ -36,6 +36,14 @@ ret %out } +define @whilerw_bfloat(bfloat* %a, bfloat* %b) { +; CHECK-LABEL: whilerw_bfloat: +; CHECK: whilerw p0.h, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) + ret %out +} + define @whilerw_half(half* %a, half* %b) { ; CHECK-LABEL: whilerw_half: ; CHECK: whilerw p0.h, x0, x1 @@ -96,6 +104,14 @@ ret %out } +define @whilewr_bfloat(bfloat* %a, bfloat* %b) { +; CHECK-LABEL: whilewr_bfloat: +; CHECK: whilewr p0.h, x0, x1 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) + ret %out +} + define @whilewr_half(half* %a, half* %b) { ; CHECK-LABEL: whilewr_half: ; CHECK: whilewr p0.h, x0, x1 @@ -125,6 +141,7 @@ declare @llvm.aarch64.sve.whilerw.s.nx4i1(i32* %a, i32* %b) declare @llvm.aarch64.sve.whilerw.d.nx2i1(i64* %a, i64* %b) +declare @llvm.aarch64.sve.whilerw.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) declare @llvm.aarch64.sve.whilerw.h.nx8i1.f16.f16(half* %a, half* %b) declare @llvm.aarch64.sve.whilerw.s.nx4i1.f32.f32(float* %a, float* %b) declare @llvm.aarch64.sve.whilerw.d.nx2i1.f64.f64(double* %a, double* %b) @@ -134,6 +151,7 @@ declare @llvm.aarch64.sve.whilewr.s.nx4i1(i32* %a, i32* %b) declare @llvm.aarch64.sve.whilewr.d.nx2i1(i64* %a, i64* %b) +declare @llvm.aarch64.sve.whilewr.h.nx8i1.bf16.bf16(bfloat* %a, bfloat* %b) declare @llvm.aarch64.sve.whilewr.h.nx8i1.f16.f16(half* %a, half* %b) declare @llvm.aarch64.sve.whilewr.s.nx4i1.f32.f32(float* %a, float* %b) declare @llvm.aarch64.sve.whilewr.d.nx2i1.f64.f64(double* %a, double* %b)