diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -35,10 +35,11 @@ // // - IsFP is true for vectors of floating-point elements. // +// - IsBF true for vector of brain float elements. //===----------------------------------------------------------------------===// #ifndef SVE_VECTOR_TYPE -#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP) \ +#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP, IsBF) \ SVE_TYPE(Name, Id, SingletonId) #endif @@ -49,19 +50,20 @@ //===- Vector point types -----------------------------------------------===// -SVE_VECTOR_TYPE("__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, true, false) -SVE_VECTOR_TYPE("__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false) -SVE_VECTOR_TYPE("__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false) -SVE_VECTOR_TYPE("__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false) +SVE_VECTOR_TYPE("__SVInt8_t", SveInt8, SveInt8Ty, 16, 8, true, false, false) +SVE_VECTOR_TYPE("__SVInt16_t", SveInt16, SveInt16Ty, 8, 16, true, false, false) +SVE_VECTOR_TYPE("__SVInt32_t", SveInt32, SveInt32Ty, 4, 32, true, false, false) +SVE_VECTOR_TYPE("__SVInt64_t", SveInt64, SveInt64Ty, 2, 64, true, false, false) -SVE_VECTOR_TYPE("__SVUint8_t", SveUint8, SveUint8Ty, 16, 8, false, false) -SVE_VECTOR_TYPE("__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false) -SVE_VECTOR_TYPE("__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false) -SVE_VECTOR_TYPE("__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false) +SVE_VECTOR_TYPE("__SVUint8_t", SveUint8, SveUint8Ty, 16, 8, false, false, false) +SVE_VECTOR_TYPE("__SVUint16_t", SveUint16, SveUint16Ty, 8, 16, false, false, false) +SVE_VECTOR_TYPE("__SVUint32_t", SveUint32, SveUint32Ty, 4, 32, false, false, false) +SVE_VECTOR_TYPE("__SVUint64_t", SveUint64, SveUint64Ty, 2, 64, false, false, false) -SVE_VECTOR_TYPE("__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true) -SVE_VECTOR_TYPE("__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true) -SVE_VECTOR_TYPE("__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true) +SVE_VECTOR_TYPE("__SVBFloat16_t", SveBFloat16, SveBFloat16Ty, 8, 16, false, false, true) +SVE_VECTOR_TYPE("__SVFloat16_t", SveFloat16, SveFloat16Ty, 8, 16, true, true, false) +SVE_VECTOR_TYPE("__SVFloat32_t", SveFloat32, SveFloat32Ty, 4, 32, true, true, false) +SVE_VECTOR_TYPE("__SVFloat64_t", SveFloat64, SveFloat64Ty, 2, 64, true, true, false) SVE_PREDICATE_TYPE("__SVBool_t", SveBool, SveBoolTy, 16) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -48,6 +48,7 @@ // f: float // h: half-float // d: double +// b: bfloat // Typespec modifiers // ------------------ @@ -144,6 +145,7 @@ def EltTyBool16 : EltType<9>; def EltTyBool32 : EltType<10>; def EltTyBool64 : EltType<11>; +def EltTyBFloat16 : EltType<12>; class MemEltType { int Value = val; @@ -474,6 +476,9 @@ let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64)" in { def SVLD1RO : SInst<"svld1ro[_{2}]", "dPc", "csilUcUsUiUlhfd", MergeNone, "aarch64_sve_ld1ro">; } +let ArchGuard = "defined(__ARM_FEATURE_SVE_MATMUL_FP64) && defined(__ARM_FEATURE_BF16_SCALAR_ARITHMETIC)" in { + def SVLD1RO_BF : SInst<"svld1ro[_{2}]", "dPc", "b", MergeNone, "aarch64_sve_ld1ro">; +} //////////////////////////////////////////////////////////////////////////////// // Stores diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -2131,7 +2131,8 @@ // Because the length is only known at runtime, we use a dummy value // of 0 for the static length. The alignment values are those defined // by the Procedure Call Standard for the Arm Architecture. -#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP) \ +#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP, \ + IsBF) \ case BuiltinType::Id: \ Width = 0; \ Align = 128; \ @@ -3640,13 +3641,16 @@ unsigned NumElts) const { if (Target->hasAArch64SVETypes()) { uint64_t EltTySize = getTypeSize(EltTy); -#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP) \ +#define SVE_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP, \ + IsBF) \ if (!EltTy->isBooleanType() && \ ((EltTy->hasIntegerRepresentation() && \ EltTy->hasSignedIntegerRepresentation() == IsSigned) || \ - (EltTy->hasFloatingRepresentation() && IsFP)) && \ - EltTySize == ElBits && NumElts == NumEls) \ - return SingletonId; + (EltTy->hasFloatingRepresentation() && IsFP) || \ + (EltTy->isBFloat16Type() && IsBF)) && \ + EltTySize == ElBits && NumElts == NumEls) { \ + return SingletonId; \ + } #define SVE_PREDICATE_TYPE(Name, Id, SingletonId, NumEls) \ if (EltTy->isBooleanType() && NumElts == NumEls) \ return SingletonId; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7556,6 +7556,9 @@ case SVETypeFlags::EltTyFloat64: return Builder.getDoubleTy(); + case SVETypeFlags::EltTyBFloat16: + return Builder.getBFloatTy(); + case SVETypeFlags::EltTyBool8: case SVETypeFlags::EltTyBool16: case SVETypeFlags::EltTyBool32: @@ -7616,6 +7619,8 @@ case SVETypeFlags::EltTyFloat16: return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8); + case SVETypeFlags::EltTyBFloat16: + return llvm::ScalableVectorType::get(Builder.getBFloatTy(), 8); case SVETypeFlags::EltTyFloat32: return llvm::ScalableVectorType::get(Builder.getFloatTy(), 4); case SVETypeFlags::EltTyFloat64: diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -570,6 +570,9 @@ case BuiltinType::SveBool: return llvm::VectorType::get(llvm::IntegerType::get(getLLVMContext(), 1), {16, true}); + case BuiltinType::SveBFloat16: + return llvm::ScalableVectorType::get( + llvm::Type::getBFloatTy(getLLVMContext()), 8); break; case BuiltinType::Dependent: #define BUILTIN_TYPE(Id, SingletonId) diff --git a/clang/test/AST/ast-dump-aarch64-sve-types.c b/clang/test/AST/ast-dump-aarch64-sve-types.c --- a/clang/test/AST/ast-dump-aarch64-sve-types.c +++ b/clang/test/AST/ast-dump-aarch64-sve-types.c @@ -25,6 +25,9 @@ // CHECK: TypedefDecl {{.*}} implicit __SVUint64_t '__SVUint64_t' // CHECK-NEXT: -BuiltinType {{.*}} '__SVUint64_t' +// CHECK: TypedefDecl {{.*}} implicit __SVBFloat16_t '__SVBFloat16_t' +// CHECK-NEXT: -BuiltinType {{.*}} '__SVBFloat16_t' + // CHECK: TypedefDecl {{.*}} implicit __SVFloat16_t '__SVFloat16_t' // CHECK-NEXT: -BuiltinType {{.*}} '__SVFloat16_t' diff --git a/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-intrinsics/acle_sve_ld1ro-bfloat.c @@ -0,0 +1,19 @@ +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE_MATMUL_FP64 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -D__ARM_FEATURE_SVE_MATMUL_FP64 -D__ARM_FEATURE_BF16_SCALAR_ARITHMETIC -D__ARM_FEATURE_SVE -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fallow-half-arguments-and-returns -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s + +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1, A2_UNUSED, A3, A4_UNUSED) A1##A3 +#else +#define SVE_ACLE_FUNC(A1, A2, A3, A4) A1##A2##A3##A4 +#endif + +svbfloat16_t test_svld1ro_bf16(svbool_t pg, const bfloat16_t *base) { + // CHECK-LABEL: test_svld1ro_bf16 + // CHECK: %[[PG:.*]] = call @llvm.aarch64.sve.convert.from.svbool.nxv8i1( %pg) + // CHECK: %[[INTRINSIC:.*]] = call @llvm.aarch64.sve.ld1ro.nxv8bf16( %[[PG]], bfloat* %base) + // CHECK: ret %[[INTRINSIC]] + return SVE_ACLE_FUNC(svld1ro, _bf16, , )(pg, base); +} diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -5393,6 +5393,7 @@ "__SVUint32_t", "__SVUint64_t", "__SVFloat16_t", + "__SVBFloat16_t", "__SVFloat32_t", "__SVFloat64_t", "__SVBool_t" diff --git a/clang/unittests/AST/SizelessTypesTest.cpp b/clang/unittests/AST/SizelessTypesTest.cpp --- a/clang/unittests/AST/SizelessTypesTest.cpp +++ b/clang/unittests/AST/SizelessTypesTest.cpp @@ -42,6 +42,8 @@ ASSERT_TRUE(Ctx.SveFloat32Ty->isSizelessBuiltinType()); ASSERT_TRUE(Ctx.SveFloat64Ty->isSizelessBuiltinType()); + ASSERT_TRUE(Ctx.SveBFloat16Ty->isSizelessBuiltinType()); + ASSERT_TRUE(Ctx.SveBoolTy->isSizelessBuiltinType()); ASSERT_FALSE(Ctx.VoidTy->isSizelessBuiltinType()); @@ -70,6 +72,8 @@ ASSERT_TRUE(Ctx.SveFloat32Ty->isSizelessType()); ASSERT_TRUE(Ctx.SveFloat64Ty->isSizelessType()); + ASSERT_TRUE(Ctx.SveBFloat16Ty->isSizelessType()); + ASSERT_TRUE(Ctx.SveBoolTy->isSizelessType()); ASSERT_FALSE(Ctx.VoidTy->isSizelessType()); diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp --- a/clang/utils/TableGen/SveEmitter.cpp +++ b/clang/utils/TableGen/SveEmitter.cpp @@ -65,7 +65,7 @@ class SVEType { TypeSpec TS; - bool Float, Signed, Immediate, Void, Constant, Pointer; + bool Float, Signed, Immediate, Void, Constant, Pointer, BFloat; bool DefaultType, IsScalable, Predicate, PredicatePattern, PrefetchOp; unsigned Bitwidth, ElementBitwidth, NumVectors; @@ -74,9 +74,9 @@ SVEType(TypeSpec TS, char CharMod) : TS(TS), Float(false), Signed(true), Immediate(false), Void(false), - Constant(false), Pointer(false), DefaultType(false), IsScalable(true), - Predicate(false), PredicatePattern(false), PrefetchOp(false), - Bitwidth(128), ElementBitwidth(~0U), NumVectors(1) { + Constant(false), Pointer(false), BFloat(false), DefaultType(false), + IsScalable(true), Predicate(false), PredicatePattern(false), + PrefetchOp(false), Bitwidth(128), ElementBitwidth(~0U), NumVectors(1) { if (!TS.empty()) applyTypespec(); applyModifier(CharMod); @@ -93,9 +93,10 @@ bool isVoid() const { return Void & !Pointer; } bool isDefault() const { return DefaultType; } bool isFloat() const { return Float; } - bool isInteger() const { return !Float && !Predicate; } + bool isBFloat() const { return BFloat; } + bool isInteger() const { return !Float && !Predicate && !BFloat; } bool isScalarPredicate() const { - return !Float && Predicate && NumVectors == 0; + return !BFloat && !Float && Predicate && NumVectors == 0; } bool isPredicateVector() const { return Predicate; } bool isPredicatePattern() const { return PredicatePattern; } @@ -362,7 +363,7 @@ if (isVoidPointer()) S += "v"; - else if (!Float) + else if (!isFloat() && !isBFloat()) switch (ElementBitwidth) { case 1: S += "b"; break; case 8: S += "c"; break; @@ -372,15 +373,20 @@ case 128: S += "LLLi"; break; default: llvm_unreachable("Unhandled case!"); } - else + else if (isFloat()) switch (ElementBitwidth) { case 16: S += "h"; break; case 32: S += "f"; break; case 64: S += "d"; break; default: llvm_unreachable("Unhandled case!"); } + else if (isBFloat()) + switch (ElementBitwidth) { + case 16: S += "y"; break; + default: llvm_unreachable("Unhandled case!"); + } - if (!isFloat()) { + if (!isFloat() && !isBFloat()) { if ((isChar() || isPointer()) && !isVoidPointer()) { // Make chars and typed pointers explicitly signed. if (Signed) @@ -421,13 +427,15 @@ else { if (isScalableVector()) S += "sv"; - if (!Signed && !Float) + if (!Signed && !Float && !isBFloat()) S += "u"; if (Float) S += "float"; else if (isScalarPredicate() || isPredicateVector()) S += "bool"; + else if (isBFloat()) + S += "bfloat"; else S += "int"; @@ -480,6 +488,10 @@ Float = true; ElementBitwidth = 64; break; + case 'b': + BFloat = true; + ElementBitwidth = 16; + break; default: llvm_unreachable("Unhandled type code!"); } @@ -524,6 +536,7 @@ case 'P': Signed = true; Float = false; + BFloat = false; Predicate = true; Bitwidth = 16; ElementBitwidth = 1; @@ -774,7 +787,6 @@ BaseTypeSpec(BT), Class(Class), Guard(Guard.str()), MergeSuffix(MergeSuffix.str()), BaseType(BT, 'd'), Flags(Flags), ImmChecks(Checks.begin(), Checks.end()) { - // Types[0] is the return value. for (unsigned I = 0; I < Proto.size(); ++I) { SVEType T(BaseTypeSpec, Proto[I]); @@ -849,6 +861,8 @@ TypeCode = T.isSigned() ? 's' : 'u'; else if (T.isPredicateVector()) TypeCode = 'b'; + else if (T.isBFloat()) + TypeCode = "bf"; else TypeCode = 'f'; Ret.replace(Pos, NumChars, TypeCode + utostr(T.getElementSizeInBits())); @@ -924,6 +938,15 @@ } } + if (T.isBFloat()) { + switch (T.getElementSizeInBits()) { + case 16: + return encodeEltType("EltTyBFloat16"); + default: + llvm_unreachable("Unhandled float element bitwidth!"); + } + } + if (T.isPredicateVector()) { switch (T.getElementSizeInBits()) { case 8: @@ -1069,6 +1092,12 @@ OS << "typedef __SVUint32_t svuint32_t;\n"; OS << "typedef __SVUint64_t svuint64_t;\n"; OS << "typedef __SVFloat16_t svfloat16_t;\n"; + OS << "typedef __SVBFloat16_t svbfloat16_t;\n\n"; + + OS << "#ifdef __ARM_FEATURE_BF16_SCALAR_ARITHMETIC\n"; + OS << "typedef __bf16 bfloat16_t;\n"; + OS << "#endif\n\n"; + OS << "typedef __SVFloat32_t svfloat32_t;\n"; OS << "typedef __SVFloat64_t svfloat64_t;\n"; OS << "typedef __SVBool_t svbool_t;\n\n";