Index: clang/include/clang/AST/ASTContext.h =================================================================== --- clang/include/clang/AST/ASTContext.h +++ clang/include/clang/AST/ASTContext.h @@ -1099,7 +1099,7 @@ CanQualType SatUnsignedShortFractTy, SatUnsignedFractTy, SatUnsignedLongFractTy; CanQualType HalfTy; // [OpenCL 6.1.1.1], ARM NEON - CanQualType BFloat16Ty; + CanQualType BFloat16Ty; // ISO/IEC/IEEE 60559. CanQualType Float16Ty; // C11 extension ISO/IEC TS 18661-3 CanQualType VoidPtrTy, NullPtrTy; CanQualType DependentTy, OverloadTy, BoundMemberTy, UnknownAnyTy; Index: clang/include/clang/Basic/DiagnosticSemaKinds.td =================================================================== --- clang/include/clang/Basic/DiagnosticSemaKinds.td +++ clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -8748,8 +8748,6 @@ def err_nullptr_cast : Error< "cannot cast an object of type %select{'nullptr_t' to %1|%1 to 'nullptr_t'}0" >; -def err_cast_to_bfloat16 : Error<"cannot type-cast to __bf16">; -def err_cast_from_bfloat16 : Error<"cannot type-cast from __bf16">; def err_typecheck_expect_scalar_operand : Error< "operand of type %0 where arithmetic or pointer type is required">; def err_typecheck_cond_incompatible_operands : Error< Index: clang/include/clang/Basic/FPOptions.def =================================================================== --- clang/include/clang/Basic/FPOptions.def +++ clang/include/clang/Basic/FPOptions.def @@ -26,4 +26,5 @@ OPTION(AllowApproxFunc, bool, 1, AllowReciprocal) OPTION(FPEvalMethod, LangOptions::FPEvalMethodKind, 2, AllowApproxFunc) OPTION(Float16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod) +OPTION(BFloat16ExcessPrecision, LangOptions::ExcessPrecisionKind, 2, FPEvalMethod) #undef OPTION Index: clang/include/clang/Basic/LangOptions.def =================================================================== --- clang/include/clang/Basic/LangOptions.def +++ clang/include/clang/Basic/LangOptions.def @@ -318,6 +318,7 @@ BENIGN_ENUM_LANGOPT(FPExceptionMode, FPExceptionModeKind, 2, FPE_Default, "FP Exception Behavior Mode type") BENIGN_ENUM_LANGOPT(FPEvalMethod, FPEvalMethodKind, 2, FEM_UnsetOnCommandLine, "FP type used for floating point arithmetic") ENUM_LANGOPT(Float16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for floating point arithmetic") +ENUM_LANGOPT(BFloat16ExcessPrecision, ExcessPrecisionKind, 2, FPP_Standard, "Intermediate truncation behavior for floating point arithmetic") LANGOPT(NoBitFieldTypeAlign , 1, 0, "bit-field type alignment") LANGOPT(HexagonQdsp6Compat , 1, 0, "hexagon-qdsp6 backward compatibility") LANGOPT(ObjCAutoRefCount , 1, 0, "Objective-C automated reference counting") Index: clang/include/clang/Basic/TargetInfo.h =================================================================== --- clang/include/clang/Basic/TargetInfo.h +++ clang/include/clang/Basic/TargetInfo.h @@ -219,6 +219,8 @@ bool HasFloat128; bool HasFloat16; bool HasBFloat16; + bool HasFullBFloat16; // True if the backend supports native bfloat16 + // arithmetic. bool HasIbm128; bool HasLongDouble; bool HasFPReturn; @@ -648,7 +650,13 @@ virtual bool hasFloat16Type() const { return HasFloat16; } /// Determine whether the _BFloat16 type is supported on this target. - virtual bool hasBFloat16Type() const { return HasBFloat16; } + virtual bool hasBFloat16Type() const { + return HasBFloat16 || HasFullBFloat16; + } + + /// Determine whether the BFloat type is fully supported on this target, i.e + /// arithemtic operations. + virtual bool hasFullBFloat16Type() const { return HasFullBFloat16; } /// Determine whether the __ibm128 type is supported on this target. virtual bool hasIbm128Type() const { return HasIbm128; } Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -1637,6 +1637,15 @@ Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">, NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>, MarshallingInfoEnum, "FPP_Standard">; +def fbfloat16_excess_precision_EQ : Joined<["-"], "fbfloat16-excess-precision=">, + Group, Flags<[CC1Option, NoDriverOption]>, + HelpText<"Allows control over excess precision on targets where native " + "support for BFloat16 precision types is not available. By default, excess " + "precision is used to calculate intermediate results following the " + "rules specified in ISO C99.">, + Values<"standard,fast,none">, NormalizedValuesScope<"LangOptions">, + NormalizedValues<["FPP_Standard", "FPP_Fast", "FPP_None"]>, + MarshallingInfoEnum, "FPP_Standard">; def : Flag<["-"], "fexpensive-optimizations">, Group; def : Flag<["-"], "fno-expensive-optimizations">, Group; def fextdirs_EQ : Joined<["-"], "fextdirs=">, Group; Index: clang/lib/AST/Type.cpp =================================================================== --- clang/lib/AST/Type.cpp +++ clang/lib/AST/Type.cpp @@ -1487,7 +1487,13 @@ bool QualType::UseExcessPrecision(const ASTContext &Ctx) { const BuiltinType *BT = getTypePtr()->getAs(); - if (BT) { + if (!BT) { + const VectorType *VT = getTypePtr()->getAs(); + if (VT) { + QualType ElementType = VT->getElementType(); + return ElementType.UseExcessPrecision(Ctx); + } + } else { switch (BT->getKind()) { case BuiltinType::Kind::Float16: { const TargetInfo &TI = Ctx.getTargetInfo(); @@ -1496,7 +1502,15 @@ Ctx.getLangOpts().ExcessPrecisionKind::FPP_None) return true; return false; - } + } break; + case BuiltinType::Kind::BFloat16: { + const TargetInfo &TI = Ctx.getTargetInfo(); + if (TI.hasBFloat16Type() && !TI.hasFullBFloat16Type() && + Ctx.getLangOpts().getBFloat16ExcessPrecision() != + Ctx.getLangOpts().ExcessPrecisionKind::FPP_None) + return true; + return false; + } break; default: return false; } @@ -2182,9 +2196,8 @@ bool Type::isArithmeticType() const { if (const auto *BT = dyn_cast(CanonicalType)) - return BT->getKind() >= BuiltinType::Bool && - BT->getKind() <= BuiltinType::Ibm128 && - BT->getKind() != BuiltinType::BFloat16; + return BT->getKind() >= BuiltinType::Bool && + BT->getKind() <= BuiltinType::Ibm128; if (const auto *ET = dyn_cast(CanonicalType)) // GCC allows forward declaration of enum types (forbid by C99 6.7.2.3p2). // If a body isn't seen by the time we get here, return false. Index: clang/lib/Basic/TargetInfo.cpp =================================================================== --- clang/lib/Basic/TargetInfo.cpp +++ clang/lib/Basic/TargetInfo.cpp @@ -64,6 +64,7 @@ HasIbm128 = false; HasFloat16 = false; HasBFloat16 = false; + HasFullBFloat16 = false; HasLongDouble = true; HasFPReturn = true; HasStrictFP = false; Index: clang/lib/Basic/Targets/AMDGPU.h =================================================================== --- clang/lib/Basic/Targets/AMDGPU.h +++ clang/lib/Basic/Targets/AMDGPU.h @@ -118,7 +118,7 @@ } bool hasBFloat16Type() const override { return isAMDGCN(getTriple()); } - const char *getBFloat16Mangling() const override { return "u6__bf16"; }; + const char *getBFloat16Mangling() const override { return "DF16b"; }; std::string_view getClobbers() const override { return ""; } Index: clang/lib/Basic/Targets/ARM.cpp =================================================================== --- clang/lib/Basic/Targets/ARM.cpp +++ clang/lib/Basic/Targets/ARM.cpp @@ -514,6 +514,7 @@ HasFloat16 = true; ARMCDECoprocMask = 0; HasBFloat16 = false; + HasFullBFloat16 = false; FPRegsDisabled = false; // This does not diagnose illegal cases like having both @@ -596,6 +597,8 @@ } else if (Feature == "+pacbti") { HasPAC = 1; HasBTI = 1; + } else if (Feature == "+fullbf16") { + HasFullBFloat16 = true; } } Index: clang/lib/Basic/Targets/NVPTX.h =================================================================== --- clang/lib/Basic/Targets/NVPTX.h +++ clang/lib/Basic/Targets/NVPTX.h @@ -181,7 +181,7 @@ bool hasBitIntType() const override { return true; } bool hasBFloat16Type() const override { return true; } - const char *getBFloat16Mangling() const override { return "u6__bf16"; }; + const char *getBFloat16Mangling() const override { return "DF16b"; }; }; } // namespace targets } // namespace clang Index: clang/lib/Basic/Targets/X86.h =================================================================== --- clang/lib/Basic/Targets/X86.h +++ clang/lib/Basic/Targets/X86.h @@ -417,7 +417,7 @@ return getPointerWidthV(AddrSpace); } - const char *getBFloat16Mangling() const override { return "u6__bf16"; }; + const char *getBFloat16Mangling() const override { return "DF16b"; }; }; // X86-32 generic target Index: clang/lib/Basic/Targets/X86.cpp =================================================================== --- clang/lib/Basic/Targets/X86.cpp +++ clang/lib/Basic/Targets/X86.cpp @@ -359,6 +359,8 @@ HasCRC32 = true; } else if (Feature == "+x87") { HasX87 = true; + } else if (Feature == "+fullbf16") { + HasFullBFloat16 = true; } X86SSEEnum Level = llvm::StringSwitch(Feature) @@ -1117,6 +1119,7 @@ .Case("xsavec", HasXSAVEC) .Case("xsaves", HasXSAVES) .Case("xsaveopt", HasXSAVEOPT) + .Case("fullbf16", HasFullBFloat16) .Default(false); } Index: clang/lib/CodeGen/CGExprScalar.cpp =================================================================== --- clang/lib/CodeGen/CGExprScalar.cpp +++ clang/lib/CodeGen/CGExprScalar.cpp @@ -814,13 +814,21 @@ Value *(ScalarExprEmitter::*F)(const BinOpInfo &)); QualType getPromotionType(QualType Ty) { + const auto &Ctx = CGF.getContext(); if (auto *CT = Ty->getAs()) { QualType ElementType = CT->getElementType(); - if (ElementType.UseExcessPrecision(CGF.getContext())) - return CGF.getContext().getComplexType(CGF.getContext().FloatTy); + if (ElementType.UseExcessPrecision(Ctx)) + return Ctx.getComplexType(Ctx.FloatTy); } - if (Ty.UseExcessPrecision(CGF.getContext())) - return CGF.getContext().FloatTy; + + if (Ty.UseExcessPrecision(Ctx)) { + if (auto *VT = Ty->getAs()) { + unsigned NumElements = VT->getNumElements(); + return Ctx.getVectorType(Ctx.FloatTy, NumElements, VT->getVectorKind()); + } + return Ctx.FloatTy; + } + return QualType(); } Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -2777,6 +2777,7 @@ FPContract = "on"; bool StrictFPModel = false; StringRef Float16ExcessPrecision = ""; + StringRef BFloat16ExcessPrecision = ""; if (const Arg *A = Args.getLastArg(options::OPT_flimited_precision_EQ)) { CmdArgs.push_back("-mlimit-float-precision"); @@ -2992,6 +2993,7 @@ D.Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } + BFloat16ExcessPrecision = Float16ExcessPrecision; break; } case options::OPT_ffinite_math_only: @@ -3167,6 +3169,9 @@ if (!Float16ExcessPrecision.empty()) CmdArgs.push_back(Args.MakeArgString("-ffloat16-excess-precision=" + Float16ExcessPrecision)); + if (!BFloat16ExcessPrecision.empty()) + CmdArgs.push_back(Args.MakeArgString("-fbfloat16-excess-precision=" + + BFloat16ExcessPrecision)); ParseMRecip(D, Args, CmdArgs); Index: clang/lib/Sema/SemaCast.cpp =================================================================== --- clang/lib/Sema/SemaCast.cpp +++ clang/lib/Sema/SemaCast.cpp @@ -3092,20 +3092,6 @@ return; } - // Can't cast to or from bfloat - if (DestType->isBFloat16Type() && !SrcType->isBFloat16Type()) { - Self.Diag(SrcExpr.get()->getExprLoc(), diag::err_cast_to_bfloat16) - << SrcExpr.get()->getSourceRange(); - SrcExpr = ExprError(); - return; - } - if (SrcType->isBFloat16Type() && !DestType->isBFloat16Type()) { - Self.Diag(SrcExpr.get()->getExprLoc(), diag::err_cast_from_bfloat16) - << SrcExpr.get()->getSourceRange(); - SrcExpr = ExprError(); - return; - } - // If either type is a pointer, the other type has to be either an // integer or a pointer. if (!DestType->isArithmeticType()) { Index: clang/lib/Sema/SemaExpr.cpp =================================================================== --- clang/lib/Sema/SemaExpr.cpp +++ clang/lib/Sema/SemaExpr.cpp @@ -10785,10 +10785,6 @@ const VectorType *RHSVecType = RHSType->getAs(); assert(LHSVecType || RHSVecType); - if ((LHSVecType && LHSVecType->getElementType()->isBFloat16Type()) || - (RHSVecType && RHSVecType->getElementType()->isBFloat16Type())) - return ReportInvalid ? InvalidOperands(Loc, LHS, RHS) : QualType(); - // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && Index: clang/lib/Sema/SemaOverload.cpp =================================================================== --- clang/lib/Sema/SemaOverload.cpp +++ clang/lib/Sema/SemaOverload.cpp @@ -1995,8 +1995,11 @@ // if their representation is different until there is back end support // We of course allow this conversion if long double is really double. - // Conversions between bfloat and other floats are not permitted. - if (FromType == S.Context.BFloat16Ty || ToType == S.Context.BFloat16Ty) + // Conversions between bfloat16 and float16 is currently not supported. + if ((FromType->isBFloat16Type() && + (ToType->isFloat16Type() || ToType->isHalfType())) || + (ToType->isBFloat16Type() && + (FromType->isFloat16Type() || FromType->isHalfType()))) return false; // Conversions between IEEE-quad and IBM-extended semantics are not @@ -2017,9 +2020,6 @@ ToType->isIntegralType(S.Context)) || (FromType->isIntegralOrUnscopedEnumerationType() && ToType->isRealFloatingType())) { - // Conversions between bfloat and int are not permitted. - if (FromType->isBFloat16Type() || ToType->isBFloat16Type()) - return false; // Floating-integral conversions (C++ 4.9). SCS.Second = ICK_Floating_Integral; @@ -2048,9 +2048,8 @@ // Compatible conversions (Clang extension for C function overloading) SCS.Second = ICK_Compatible_Conversion; FromType = ToType.getUnqualifiedType(); - } else if (IsTransparentUnionStandardConversion(S, From, ToType, - InOverloadResolution, - SCS, CStyle)) { + } else if (IsTransparentUnionStandardConversion( + S, From, ToType, InOverloadResolution, SCS, CStyle)) { SCS.Second = ICK_TransparentUnionConversion; FromType = ToType; } else if (tryAtomicConversion(S, From, ToType, InOverloadResolution, SCS, Index: clang/test/CodeGen/X86/avx512bf16-error.c =================================================================== --- clang/test/CodeGen/X86/avx512bf16-error.c +++ clang/test/CodeGen/X86/avx512bf16-error.c @@ -7,7 +7,6 @@ #include -// expected-error@+4 {{invalid operands to binary expression ('__bfloat16' (aka '__bf16') and '__bfloat16')}} // expected-warning@+2 3 {{'__bfloat16' is deprecated: use __bf16 instead}} // expected-note@* 3 {{'__bfloat16' has been explicitly marked deprecated here}} __bfloat16 bar(__bfloat16 a, __bfloat16 b) { Index: clang/test/CodeGen/X86/bfloat-mangle.cpp =================================================================== --- clang/test/CodeGen/X86/bfloat-mangle.cpp +++ clang/test/CodeGen/X86/bfloat-mangle.cpp @@ -3,6 +3,6 @@ // RUN: %clang_cc1 -triple i386-windows-msvc -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s --check-prefixes=WINDOWS // RUN: %clang_cc1 -triple x86_64-windows-msvc -target-feature +sse2 -emit-llvm -o - %s | FileCheck %s --check-prefixes=WINDOWS -// LINUX: define {{.*}}void @_Z3foou6__bf16(bfloat noundef %b) +// LINUX: define {{.*}}void @_Z3fooDF16b(bfloat noundef %b) // WINDOWS: define {{.*}}void @"?foo@@YAXU__bf16@__clang@@@Z"(bfloat noundef %b) void foo(__bf16 b) {} Index: clang/test/CodeGen/X86/bfloat16.cpp =================================================================== --- /dev/null +++ clang/test/CodeGen/X86/bfloat16.cpp @@ -0,0 +1,145 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +fullbf16 -S -emit-llvm %s -o - | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -S -emit-llvm %s -o - | FileCheck -check-prefix=CHECK-NBF16 %s + +// CHECK-LABEL: define dso_local void @_Z11test_scalarDF16bDF16b +// CHECK-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: [[C:%.*]] = alloca bfloat, align 2 +// CHECK-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[ADD:%.*]] = fadd bfloat [[TMP0]], [[TMP1]] +// CHECK-NEXT: store bfloat [[ADD]], ptr [[C]], align 2 +// CHECK-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[SUB:%.*]] = fsub bfloat [[TMP2]], [[TMP3]] +// CHECK-NEXT: store bfloat [[SUB]], ptr [[C]], align 2 +// CHECK-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[MUL:%.*]] = fmul bfloat [[TMP4]], [[TMP5]] +// CHECK-NEXT: store bfloat [[MUL]], ptr [[C]], align 2 +// CHECK-NEXT: [[TMP6:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NEXT: [[TMP7:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NEXT: [[DIV:%.*]] = fdiv bfloat [[TMP6]], [[TMP7]] +// CHECK-NEXT: store bfloat [[DIV]], ptr [[C]], align 2 +// CHECK-NEXT: ret void +// +// CHECK-NBF16-LABEL: define dso_local void @_Z11test_scalarDF16bDF16b +// CHECK-NBF16-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NBF16: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NBF16-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NBF16-NEXT: [[C:%.*]] = alloca bfloat, align 2 +// CHECK-NBF16-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NBF16-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float +// CHECK-NBF16-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT1:%.*]] = fpext bfloat [[TMP1]] to float +// CHECK-NBF16-NEXT: [[ADD:%.*]] = fadd float [[EXT]], [[EXT1]] +// CHECK-NBF16-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to bfloat +// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION]], ptr [[C]], align 2 +// CHECK-NBF16-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT2:%.*]] = fpext bfloat [[TMP2]] to float +// CHECK-NBF16-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT3:%.*]] = fpext bfloat [[TMP3]] to float +// CHECK-NBF16-NEXT: [[SUB:%.*]] = fsub float [[EXT2]], [[EXT3]] +// CHECK-NBF16-NEXT: [[UNPROMOTION4:%.*]] = fptrunc float [[SUB]] to bfloat +// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION4]], ptr [[C]], align 2 +// CHECK-NBF16-NEXT: [[TMP4:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT5:%.*]] = fpext bfloat [[TMP4]] to float +// CHECK-NBF16-NEXT: [[TMP5:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT6:%.*]] = fpext bfloat [[TMP5]] to float +// CHECK-NBF16-NEXT: [[MUL:%.*]] = fmul float [[EXT5]], [[EXT6]] +// CHECK-NBF16-NEXT: [[UNPROMOTION7:%.*]] = fptrunc float [[MUL]] to bfloat +// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION7]], ptr [[C]], align 2 +// CHECK-NBF16-NEXT: [[TMP6:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT8:%.*]] = fpext bfloat [[TMP6]] to float +// CHECK-NBF16-NEXT: [[TMP7:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NBF16-NEXT: [[EXT9:%.*]] = fpext bfloat [[TMP7]] to float +// CHECK-NBF16-NEXT: [[DIV:%.*]] = fdiv float [[EXT8]], [[EXT9]] +// CHECK-NBF16-NEXT: [[UNPROMOTION10:%.*]] = fptrunc float [[DIV]] to bfloat +// CHECK-NBF16-NEXT: store bfloat [[UNPROMOTION10]], ptr [[C]], align 2 +// CHECK-NBF16-NEXT: ret void +// +void test_scalar(__bf16 a, __bf16 b) { + __bf16 c; + c = a + b; + c = a - b; + c = a * b; + c = a / b; +} + +typedef __bf16 v8bfloat16 __attribute__((__vector_size__(16))); + +// CHECK-LABEL: define dso_local void @_Z11test_vectorDv8_DF16bS_ +// CHECK-SAME: (<8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: [[C:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16 +// CHECK-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[ADD:%.*]] = fadd <8 x bfloat> [[TMP0]], [[TMP1]] +// CHECK-NEXT: store <8 x bfloat> [[ADD]], ptr [[C]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[SUB:%.*]] = fsub <8 x bfloat> [[TMP2]], [[TMP3]] +// CHECK-NEXT: store <8 x bfloat> [[SUB]], ptr [[C]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x bfloat> [[TMP4]], [[TMP5]] +// CHECK-NEXT: store <8 x bfloat> [[MUL]], ptr [[C]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NEXT: [[TMP7:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NEXT: [[DIV:%.*]] = fdiv <8 x bfloat> [[TMP6]], [[TMP7]] +// CHECK-NEXT: store <8 x bfloat> [[DIV]], ptr [[C]], align 16 +// CHECK-NEXT: ret void +// +// CHECK-NBF16-LABEL: define dso_local void @_Z11test_vectorDv8_DF16bS_ +// CHECK-NBF16-SAME: (<8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NBF16: [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NBF16-NEXT: [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NBF16-NEXT: [[C:%.*]] = alloca <8 x bfloat>, align 16 +// CHECK-NBF16-NEXT: store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16 +// CHECK-NBF16-NEXT: store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[TMP0:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT:%.*]] = fpext <8 x bfloat> [[TMP0]] to <8 x float> +// CHECK-NBF16-NEXT: [[TMP1:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT1:%.*]] = fpext <8 x bfloat> [[TMP1]] to <8 x float> +// CHECK-NBF16-NEXT: [[ADD:%.*]] = fadd <8 x float> [[EXT]], [[EXT1]] +// CHECK-NBF16-NEXT: [[UNPROMOTION:%.*]] = fptrunc <8 x float> [[ADD]] to <8 x bfloat> +// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION]], ptr [[C]], align 16 +// CHECK-NBF16-NEXT: [[TMP2:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT2:%.*]] = fpext <8 x bfloat> [[TMP2]] to <8 x float> +// CHECK-NBF16-NEXT: [[TMP3:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT3:%.*]] = fpext <8 x bfloat> [[TMP3]] to <8 x float> +// CHECK-NBF16-NEXT: [[SUB:%.*]] = fsub <8 x float> [[EXT2]], [[EXT3]] +// CHECK-NBF16-NEXT: [[UNPROMOTION4:%.*]] = fptrunc <8 x float> [[SUB]] to <8 x bfloat> +// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION4]], ptr [[C]], align 16 +// CHECK-NBF16-NEXT: [[TMP4:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT5:%.*]] = fpext <8 x bfloat> [[TMP4]] to <8 x float> +// CHECK-NBF16-NEXT: [[TMP5:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT6:%.*]] = fpext <8 x bfloat> [[TMP5]] to <8 x float> +// CHECK-NBF16-NEXT: [[MUL:%.*]] = fmul <8 x float> [[EXT5]], [[EXT6]] +// CHECK-NBF16-NEXT: [[UNPROMOTION7:%.*]] = fptrunc <8 x float> [[MUL]] to <8 x bfloat> +// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION7]], ptr [[C]], align 16 +// CHECK-NBF16-NEXT: [[TMP6:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT8:%.*]] = fpext <8 x bfloat> [[TMP6]] to <8 x float> +// CHECK-NBF16-NEXT: [[TMP7:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16 +// CHECK-NBF16-NEXT: [[EXT9:%.*]] = fpext <8 x bfloat> [[TMP7]] to <8 x float> +// CHECK-NBF16-NEXT: [[DIV:%.*]] = fdiv <8 x float> [[EXT8]], [[EXT9]] +// CHECK-NBF16-NEXT: [[UNPROMOTION10:%.*]] = fptrunc <8 x float> [[DIV]] to <8 x bfloat> +// CHECK-NBF16-NEXT: store <8 x bfloat> [[UNPROMOTION10]], ptr [[C]], align 16 +// CHECK-NBF16-NEXT: ret void +// +void test_vector(v8bfloat16 a, v8bfloat16 b) { + v8bfloat16 c; + c = a + b; + c = a - b; + c = a * b; + c = a / b; +} Index: clang/test/CodeGen/X86/fexcess-precision-bfloat16.c =================================================================== --- /dev/null +++ clang/test/CodeGen/X86/fexcess-precision-bfloat16.c @@ -0,0 +1,360 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \ +// RUN: -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=source -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-NO-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=double -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=fast -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=standard -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -emit-llvm -ffp-eval-method=extended -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-EXT-FP80 %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -ffp-contract=on -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -ffp-contract=on -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=source -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=source -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=double -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=double -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-DBL %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -fmath-errno -ffp-contract=on -fno-rounding-math \ +// RUN: -ffp-eval-method=extended -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-CONTRACT-EXT %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none \ +// RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ +// RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \ +// RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown \ +// RUN: -fbfloat16-excess-precision=none -target-feature +fullbf16 \ +// RUN: -fapprox-func -fmath-errno -fno-signed-zeros -mreassociate \ +// RUN: -freciprocal-math -ffp-contract=on -fno-rounding-math \ +// RUN: -funsafe-math-optimizations -emit-llvm -o - %s \ +// RUN: | FileCheck -check-prefixes=CHECK-UNSAFE %s + +// CHECK-EXT-LABEL: define dso_local bfloat @f +// CHECK-EXT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-EXT-NEXT: entry: +// CHECK-EXT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT:%.*]] = fpext bfloat [[TMP0]] to float +// CHECK-EXT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT1:%.*]] = fpext bfloat [[TMP1]] to float +// CHECK-EXT-NEXT: [[MUL:%.*]] = fmul float [[EXT]], [[EXT1]] +// CHECK-EXT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT2:%.*]] = fpext bfloat [[TMP2]] to float +// CHECK-EXT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-EXT-NEXT: [[EXT3:%.*]] = fpext bfloat [[TMP3]] to float +// CHECK-EXT-NEXT: [[MUL4:%.*]] = fmul float [[EXT2]], [[EXT3]] +// CHECK-EXT-NEXT: [[ADD:%.*]] = fadd float [[MUL]], [[MUL4]] +// CHECK-EXT-NEXT: [[UNPROMOTION:%.*]] = fptrunc float [[ADD]] to bfloat +// CHECK-EXT-NEXT: ret bfloat [[UNPROMOTION]] +// +// CHECK-NO-EXT-LABEL: define dso_local bfloat @f +// CHECK-NO-EXT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NO-EXT-NEXT: entry: +// CHECK-NO-EXT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NO-EXT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NO-EXT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NO-EXT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-NO-EXT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[MUL:%.*]] = fmul bfloat [[TMP0]], [[TMP1]] +// CHECK-NO-EXT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-NO-EXT-NEXT: [[MUL1:%.*]] = fmul bfloat [[TMP2]], [[TMP3]] +// CHECK-NO-EXT-NEXT: [[ADD:%.*]] = fadd bfloat [[MUL]], [[MUL1]] +// CHECK-NO-EXT-NEXT: ret bfloat [[ADD]] +// +// CHECK-EXT-DBL-LABEL: define dso_local bfloat @f +// CHECK-EXT-DBL-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-EXT-DBL-NEXT: entry: +// CHECK-EXT-DBL-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-DBL-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-DBL-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-DBL-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-DBL-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to double +// CHECK-EXT-DBL-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to double +// CHECK-EXT-DBL-NEXT: [[MUL:%.*]] = fmul double [[CONV]], [[CONV1]] +// CHECK-EXT-DBL-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to double +// CHECK-EXT-DBL-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-EXT-DBL-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to double +// CHECK-EXT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] +// CHECK-EXT-DBL-NEXT: [[ADD:%.*]] = fadd double [[MUL]], [[MUL4]] +// CHECK-EXT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[ADD]] to bfloat +// CHECK-EXT-DBL-NEXT: ret bfloat [[CONV5]] +// +// CHECK-EXT-FP80-LABEL: define dso_local bfloat @f +// CHECK-EXT-FP80-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-EXT-FP80-NEXT: entry: +// CHECK-EXT-FP80-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-FP80-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-FP80-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-FP80-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-EXT-FP80-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[MUL:%.*]] = fmul x86_fp80 [[CONV]], [[CONV1]] +// CHECK-EXT-FP80-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-EXT-FP80-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to x86_fp80 +// CHECK-EXT-FP80-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] +// CHECK-EXT-FP80-NEXT: [[ADD:%.*]] = fadd x86_fp80 [[MUL]], [[MUL4]] +// CHECK-EXT-FP80-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[ADD]] to bfloat +// CHECK-EXT-FP80-NEXT: ret bfloat [[CONV5]] +// +// CHECK-CONTRACT-LABEL: define dso_local bfloat @f +// CHECK-CONTRACT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CONTRACT-NEXT: entry: +// CHECK-CONTRACT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-NEXT: [[MUL1:%.*]] = fmul bfloat [[TMP2]], [[TMP3]] +// CHECK-CONTRACT-NEXT: [[TMP4:%.*]] = call bfloat @llvm.fmuladd.bf16(bfloat [[TMP0]], bfloat [[TMP1]], bfloat [[MUL1]]) +// CHECK-CONTRACT-NEXT: ret bfloat [[TMP4]] +// +// CHECK-CONTRACT-DBL-LABEL: define dso_local bfloat @f +// CHECK-CONTRACT-DBL-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CONTRACT-DBL-NEXT: entry: +// CHECK-CONTRACT-DBL-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-DBL-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-DBL-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to double +// CHECK-CONTRACT-DBL-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-DBL-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to double +// CHECK-CONTRACT-DBL-NEXT: [[MUL4:%.*]] = fmul double [[CONV2]], [[CONV3]] +// CHECK-CONTRACT-DBL-NEXT: [[TMP4:%.*]] = call double @llvm.fmuladd.f64(double [[CONV]], double [[CONV1]], double [[MUL4]]) +// CHECK-CONTRACT-DBL-NEXT: [[CONV5:%.*]] = fptrunc double [[TMP4]] to bfloat +// CHECK-CONTRACT-DBL-NEXT: ret bfloat [[CONV5]] +// +// CHECK-CONTRACT-EXT-LABEL: define dso_local bfloat @f +// CHECK-CONTRACT-EXT-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CONTRACT-EXT-NEXT: entry: +// CHECK-CONTRACT-EXT-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-EXT-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-CONTRACT-EXT-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV:%.*]] = fpext bfloat [[TMP0]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV1:%.*]] = fpext bfloat [[TMP1]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV2:%.*]] = fpext bfloat [[TMP2]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-CONTRACT-EXT-NEXT: [[CONV3:%.*]] = fpext bfloat [[TMP3]] to x86_fp80 +// CHECK-CONTRACT-EXT-NEXT: [[MUL4:%.*]] = fmul x86_fp80 [[CONV2]], [[CONV3]] +// CHECK-CONTRACT-EXT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.fmuladd.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], x86_fp80 [[MUL4]]) +// CHECK-CONTRACT-EXT-NEXT: [[CONV5:%.*]] = fptrunc x86_fp80 [[TMP4]] to bfloat +// CHECK-CONTRACT-EXT-NEXT: ret bfloat [[CONV5]] +// +// CHECK-UNSAFE-LABEL: define dso_local bfloat @f +// CHECK-UNSAFE-SAME: (bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]], bfloat noundef [[C:%.*]], bfloat noundef [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-UNSAFE-NEXT: entry: +// CHECK-UNSAFE-NEXT: [[A_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-UNSAFE-NEXT: [[B_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-UNSAFE-NEXT: [[C_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-UNSAFE-NEXT: [[D_ADDR:%.*]] = alloca bfloat, align 2 +// CHECK-UNSAFE-NEXT: store bfloat [[A]], ptr [[A_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store bfloat [[B]], ptr [[B_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store bfloat [[C]], ptr [[C_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: store bfloat [[D]], ptr [[D_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[A_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP1:%.*]] = load bfloat, ptr [[B_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP2:%.*]] = load bfloat, ptr [[C_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[TMP3:%.*]] = load bfloat, ptr [[D_ADDR]], align 2 +// CHECK-UNSAFE-NEXT: [[MUL1:%.*]] = fmul reassoc nsz arcp afn bfloat [[TMP2]], [[TMP3]] +// CHECK-UNSAFE-NEXT: [[TMP4:%.*]] = call reassoc nsz arcp afn bfloat @llvm.fmuladd.bf16(bfloat [[TMP0]], bfloat [[TMP1]], bfloat [[MUL1]]) +// CHECK-UNSAFE-NEXT: ret bfloat [[TMP4]] +// +__bf16 f(__bf16 a, __bf16 b, __bf16 c, __bf16 d) { + return a * b + c * d; +} \ No newline at end of file Index: clang/test/CodeGenCUDA/amdgpu-bf16.cu =================================================================== --- clang/test/CodeGenCUDA/amdgpu-bf16.cu +++ clang/test/CodeGenCUDA/amdgpu-bf16.cu @@ -7,7 +7,7 @@ #include "Inputs/cuda.h" -// CHECK-LABEL: @_Z8test_argPu6__bf16u6__bf16( +// CHECK-LABEL: @_Z8test_argPDF16bDF16b( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) @@ -29,7 +29,7 @@ *out = bf16; } -// CHECK-LABEL: @_Z9test_loadPu6__bf16S_( +// CHECK-LABEL: @_Z9test_loadPDF16bS_( // CHECK-NEXT: entry: // CHECK-NEXT: [[OUT_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca ptr, align 8, addrspace(5) @@ -52,7 +52,7 @@ *out = bf16; } -// CHECK-LABEL: @_Z8test_retu6__bf16( +// CHECK-LABEL: @_Z8test_retDF16b( // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5) // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) @@ -66,7 +66,7 @@ return in; } -// CHECK-LABEL: @_Z9test_callu6__bf16( +// CHECK-LABEL: @_Z9test_callDF16b( // CHECK-NEXT: entry: // CHECK-NEXT: [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5) // CHECK-NEXT: [[IN_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5) @@ -74,7 +74,7 @@ // CHECK-NEXT: [[IN_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[IN_ADDR]] to ptr // CHECK-NEXT: store bfloat [[IN:%.*]], ptr [[IN_ADDR_ASCAST]], align 2 // CHECK-NEXT: [[TMP0:%.*]] = load bfloat, ptr [[IN_ADDR_ASCAST]], align 2 -// CHECK-NEXT: [[CALL:%.*]] = call contract noundef bfloat @_Z8test_retu6__bf16(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]] +// CHECK-NEXT: [[CALL:%.*]] = call contract noundef bfloat @_Z8test_retDF16b(bfloat noundef [[TMP0]]) #[[ATTR1:[0-9]+]] // CHECK-NEXT: ret bfloat [[CALL]] // __device__ __bf16 test_call( __bf16 in) { Index: clang/test/CodeGenCUDA/bf16.cu =================================================================== --- clang/test/CodeGenCUDA/bf16.cu +++ clang/test/CodeGenCUDA/bf16.cu @@ -6,12 +6,12 @@ #include "Inputs/cuda.h" -// CHECK-LABEL: .visible .func _Z8test_argPu6__bf16u6__bf16( -// CHECK: .param .b64 _Z8test_argPu6__bf16u6__bf16_param_0, -// CHECK: .param .b16 _Z8test_argPu6__bf16u6__bf16_param_1 +// CHECK-LABEL: .visible .func _Z8test_argPDF16bDF16b( +// CHECK: .param .b64 _Z8test_argPDF16bDF16b_param_0, +// CHECK: .param .b16 _Z8test_argPDF16bDF16b_param_1 // __device__ void test_arg(__bf16 *out, __bf16 in) { -// CHECK: ld.param.b16 %{{h.*}}, [_Z8test_argPu6__bf16u6__bf16_param_1]; +// CHECK: ld.param.b16 %{{h.*}}, [_Z8test_argPDF16bDF16b_param_1]; __bf16 bf16 = in; *out = bf16; // CHECK: st.b16 @@ -19,23 +19,23 @@ } -// CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z8test_retu6__bf16( -// CHECK: .param .b16 _Z8test_retu6__bf16_param_0 +// CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z8test_retDF16b( +// CHECK: .param .b16 _Z8test_retDF16b_param_0 __device__ __bf16 test_ret( __bf16 in) { -// CHECK: ld.param.b16 %h{{.*}}, [_Z8test_retu6__bf16_param_0]; +// CHECK: ld.param.b16 %h{{.*}}, [_Z8test_retDF16b_param_0]; return in; // CHECK: st.param.b16 [func_retval0+0], %h // CHECK: ret; } -// CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z9test_callu6__bf16( -// CHECK: .param .b16 _Z9test_callu6__bf16_param_0 +// CHECK-LABEL: .visible .func (.param .b32 func_retval0) _Z9test_callDF16b( +// CHECK: .param .b16 _Z9test_callDF16b_param_0 __device__ __bf16 test_call( __bf16 in) { -// CHECK: ld.param.b16 %h{{.*}}, [_Z9test_callu6__bf16_param_0]; +// CHECK: ld.param.b16 %h{{.*}}, [_Z9test_callDF16b_param_0]; // CHECK: st.param.b16 [param0+0], %h2; // CHECK: .param .b32 retval0; // CHECK: call.uni (retval0), -// CHECK-NEXT: _Z8test_retu6__bf16, +// CHECK-NEXT: _Z8test_retDF16b, // CHECK-NEXT: ( // CHECK-NEXT: param0 // CHECK-NEXT ); Index: clang/test/Driver/fexcess-precision.c =================================================================== --- clang/test/Driver/fexcess-precision.c +++ clang/test/Driver/fexcess-precision.c @@ -62,9 +62,13 @@ // RUN: | FileCheck --check-prefix=CHECK-ERR-NONE %s // CHECK-FAST: "-ffloat16-excess-precision=fast" +// CHECK-FAST: "-fbfloat16-excess-precision=fast" // CHECK-STD: "-ffloat16-excess-precision=standard" +// CHECK-STD: "-fbfloat16-excess-precision=standard" // CHECK-NONE: "-ffloat16-excess-precision=none" +// CHECK-NONE: "-fbfloat16-excess-precision=none" // CHECK-ERR-NONE: unsupported argument 'none' to option '-fexcess-precision=' // CHECK: "-cc1" // CHECK-NOT: "-ffloat16-excess-precision=fast" +// CHECK-NOT: "-fbfloat16-excess-precision=fast" // CHECK-ERR-16: unsupported argument '16' to option '-fexcess-precision=' Index: clang/test/Sema/arm-bf16-forbidden-ops.c =================================================================== --- clang/test/Sema/arm-bf16-forbidden-ops.c +++ /dev/null @@ -1,72 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64 -target-feature +bf16 %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64 -target-feature -bf16 %s - -__bf16 test_cast_from_float(float in) { - return (__bf16)in; // expected-error {{cannot type-cast to __bf16}} -} - -__bf16 test_cast_from_float_literal(void) { - return (__bf16)1.0f; // expected-error {{cannot type-cast to __bf16}} -} - -__bf16 test_cast_from_int(int in) { - return (__bf16)in; // expected-error {{cannot type-cast to __bf16}} -} - -__bf16 test_cast_from_int_literal(void) { - return (__bf16)1; // expected-error {{cannot type-cast to __bf16}} -} - -__bf16 test_cast_bfloat(__bf16 in) { - return (__bf16)in; // this one should work -} - -float test_cast_to_float(__bf16 in) { - return (float)in; // expected-error {{cannot type-cast from __bf16}} -} - -int test_cast_to_int(__bf16 in) { - return (int)in; // expected-error {{cannot type-cast from __bf16}} -} - -__bf16 test_implicit_from_float(float in) { - return in; // expected-error {{returning 'float' from a function with incompatible result type '__bf16'}} -} - -__bf16 test_implicit_from_float_literal(void) { - return 1.0f; // expected-error {{returning 'float' from a function with incompatible result type '__bf16'}} -} - -__bf16 test_implicit_from_int(int in) { - return in; // expected-error {{returning 'int' from a function with incompatible result type '__bf16'}} -} - -__bf16 test_implicit_from_int_literal(void) { - return 1; // expected-error {{returning 'int' from a function with incompatible result type '__bf16'}} -} - -__bf16 test_implicit_bfloat(__bf16 in) { - return in; // this one should work -} - -float test_implicit_to_float(__bf16 in) { - return in; // expected-error {{returning '__bf16' from a function with incompatible result type 'float'}} -} - -int test_implicit_to_int(__bf16 in) { - return in; // expected-error {{returning '__bf16' from a function with incompatible result type 'int'}} -} - -__bf16 test_cond(__bf16 a, __bf16 b, _Bool which) { - // Conditional operator _should_ be supported, without nonsense - // complaints like 'types __bf16 and __bf16 are not compatible' - return which ? a : b; -} - -__bf16 test_cond_float(__bf16 a, __bf16 b, _Bool which) { - return which ? a : 1.0f; // expected-error {{incompatible operand types ('__bf16' and 'float')}} -} - -__bf16 test_cond_int(__bf16 a, __bf16 b, _Bool which) { - return which ? a : 1; // expected-error {{incompatible operand types ('__bf16' and 'int')}} -} Index: clang/test/Sema/arm-bf16-forbidden-ops.cpp =================================================================== --- clang/test/Sema/arm-bf16-forbidden-ops.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64 -target-feature +bf16 %s -// RUN: %clang_cc1 -fsyntax-only -verify -triple aarch64 -target-feature -bf16 %s - -__bf16 test_static_cast_from_float(float in) { - return static_cast<__bf16>(in); // expected-error {{static_cast from 'float' to '__bf16' is not allowed}} -} - -__bf16 test_static_cast_from_float_literal(void) { - return static_cast<__bf16>(1.0f); // expected-error {{static_cast from 'float' to '__bf16' is not allowed}} -} - -__bf16 test_static_cast_from_int(int in) { - return static_cast<__bf16>(in); // expected-error {{static_cast from 'int' to '__bf16' is not allowed}} -} - -__bf16 test_static_cast_from_int_literal(void) { - return static_cast<__bf16>(1); // expected-error {{static_cast from 'int' to '__bf16' is not allowed}} -} - -__bf16 test_static_cast_bfloat(__bf16 in) { - return static_cast<__bf16>(in); // this one should work -} - -float test_static_cast_to_float(__bf16 in) { - return static_cast(in); // expected-error {{static_cast from '__bf16' to 'float' is not allowed}} -} - -int test_static_cast_to_int(__bf16 in) { - return static_cast(in); // expected-error {{static_cast from '__bf16' to 'int' is not allowed}} -} - -__bf16 test_implicit_from_float(float in) { - return in; // expected-error {{cannot initialize return object of type '__bf16' with an lvalue of type 'float'}} -} - -__bf16 test_implicit_from_float_literal() { - return 1.0f; // expected-error {{cannot initialize return object of type '__bf16' with an rvalue of type 'float'}} -} - -__bf16 test_implicit_from_int(int in) { - return in; // expected-error {{cannot initialize return object of type '__bf16' with an lvalue of type 'int'}} -} - -__bf16 test_implicit_from_int_literal() { - return 1; // expected-error {{cannot initialize return object of type '__bf16' with an rvalue of type 'int'}} -} - -__bf16 test_implicit_bfloat(__bf16 in) { - return in; // this one should work -} - -float test_implicit_to_float(__bf16 in) { - return in; // expected-error {{cannot initialize return object of type 'float' with an lvalue of type '__bf16'}} -} - -int test_implicit_to_int(__bf16 in) { - return in; // expected-error {{cannot initialize return object of type 'int' with an lvalue of type '__bf16'}} -} - -__bf16 test_cond(__bf16 a, __bf16 b, bool which) { - // Conditional operator _should_ be supported, without nonsense - // complaints like 'types __bf16 and __bf16 are not compatible' - return which ? a : b; -} - -__bf16 test_cond_float(__bf16 a, __bf16 b, bool which) { - return which ? a : 1.0f; // expected-error {{incompatible operand types ('__bf16' and 'float')}} -} - -__bf16 test_cond_int(__bf16 a, __bf16 b, bool which) { - return which ? a : 1; // expected-error {{incompatible operand types ('__bf16' and 'int')}} -} Index: clang/test/Sema/arm-bfloat.cpp =================================================================== --- clang/test/Sema/arm-bfloat.cpp +++ clang/test/Sema/arm-bfloat.cpp @@ -1,38 +1,38 @@ // RUN: %clang_cc1 -fsyntax-only -verify=scalar,neon -std=c++11 \ // RUN: -triple aarch64-arm-none-eabi -target-cpu cortex-a75 \ -// RUN: -target-feature +bf16 -target-feature +neon %s +// RUN: -target-feature +bf16 -target-feature +neon -Wno-unused %s // RUN: %clang_cc1 -fsyntax-only -verify=scalar,neon -std=c++11 \ // RUN: -triple arm-arm-none-eabi -target-cpu cortex-a53 \ -// RUN: -target-feature +bf16 -target-feature +neon %s +// RUN: -target-feature +bf16 -target-feature +neon -Wno-unused %s // The types should be available under AArch64 even without the bf16 feature // RUN: %clang_cc1 -fsyntax-only -verify=scalar -DNONEON -std=c++11 \ // RUN: -triple aarch64-arm-none-eabi -target-cpu cortex-a75 \ -// RUN: -target-feature -bf16 -target-feature +neon %s +// RUN: -target-feature -bf16 -target-feature +neon -Wno-unused %s // REQUIRES: aarch64-registered-target || arm-registered-target void test(bool b) { __bf16 bf16; - bf16 + bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 - bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 * bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 / bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} + bf16 + bf16; + bf16 - bf16; + bf16 * bf16; + bf16 / bf16; __fp16 fp16; - bf16 + fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 + bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 - fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 - bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 * fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 * bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 / fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 / bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} + bf16 + fp16; + fp16 + bf16; + bf16 - fp16; + fp16 - bf16; + bf16 * fp16; + fp16 * bf16; + bf16 / fp16; + fp16 / bf16; bf16 = fp16; // scalar-error {{assigning to '__bf16' from incompatible type '__fp16'}} fp16 = bf16; // scalar-error {{assigning to '__fp16' from incompatible type '__bf16'}} - bf16 + (b ? fp16 : bf16); // scalar-error {{incompatible operand types ('__fp16' and '__bf16')}} + bf16 + (b ? fp16 : bf16); } #ifndef NONEON @@ -40,18 +40,18 @@ #include void test_vector(bfloat16x4_t a, bfloat16x4_t b, float16x4_t c) { - a + b; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} - a - b; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} - a * b; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} - a / b; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'bfloat16x4_t')}} - - a + c; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} - a - c; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} - a * c; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} - a / c; // neon-error {{invalid operands to binary expression ('bfloat16x4_t' (vector of 4 'bfloat16_t' values) and 'float16x4_t' (vector of 4 'float16_t' values))}} - c + b; // neon-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} - c - b; // neon-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} - c * b; // neon-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} - c / b; // neon-error {{invalid operands to binary expression ('float16x4_t' (vector of 4 'float16_t' values) and 'bfloat16x4_t' (vector of 4 'bfloat16_t' values))}} + a + b; + a - b; + a * b; + a / b; + + a + c; + a - c; + a * c; + a / c; + c + b; + c - b; + c * b; + c / b; } #endif \ No newline at end of file Index: clang/test/SemaCUDA/amdgpu-bf16.cu =================================================================== --- clang/test/SemaCUDA/amdgpu-bf16.cu +++ clang/test/SemaCUDA/amdgpu-bf16.cu @@ -1,13 +1,8 @@ // REQUIRES: amdgpu-registered-target // REQUIRES: x86-registered-target -// RUN: %clang_cc1 "-triple" "x86_64-unknown-linux-gnu" "-aux-triple" "amdgcn-amd-amdhsa"\ -// RUN: "-target-cpu" "x86-64" -fsyntax-only -verify=amdgcn %s -// RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "amdgcn-amd-amdhsa"\ -// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=amdgcn %s - // RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "r600-unknown-unknown"\ -// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=amdgcn,r600 %s +// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=r600 %s // AMDGCN has storage-only support for bf16. R600 does not support it should error out when // it's the main target. @@ -29,45 +24,8 @@ // r600-error@+1 2 {{__bf16 is not supported on this target}} __device__ void test(bool b, __bf16 *out, __bf16 in) { __bf16 bf16 = in; // r600-error {{__bf16 is not supported on this target}} - - bf16 + bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 - bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 * bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 / bf16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - - __fp16 fp16; - - bf16 + fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 + bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 - fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 - bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 * fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 * bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 / fp16; // amdgcn-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 / bf16; // amdgcn-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 = fp16; // amdgcn-error {{assigning to '__bf16' from incompatible type '__fp16'}} - fp16 = bf16; // amdgcn-error {{assigning to '__fp16' from incompatible type '__bf16'}} - bf16 + (b ? fp16 : bf16); // amdgcn-error {{incompatible operand types ('__fp16' and '__bf16')}} *out = bf16; - // amdgcn-error@+1 {{static_cast from '__bf16' to 'unsigned short' is not allowed}} - unsigned short u16bf16 = static_cast(bf16); - // amdgcn-error@+2 {{C-style cast from 'unsigned short' to '__bf16' is not allowed}} - // r600-error@+1 {{__bf16 is not supported on this target}} - bf16 = (__bf16)u16bf16; - - // amdgcn-error@+1 {{static_cast from '__bf16' to 'float' is not allowed}} - float f32bf16 = static_cast(bf16); - // amdgcn-error@+2 {{C-style cast from 'float' to '__bf16' is not allowed}} - // r600-error@+1 {{__bf16 is not supported on this target}} - bf16 = (__bf16)f32bf16; - - // amdgcn-error@+1 {{static_cast from '__bf16' to 'double' is not allowed}} - double f64bf16 = static_cast(bf16); - // amdgcn-error@+2 {{C-style cast from 'double' to '__bf16' is not allowed}} - // r600-error@+1 {{__bf16 is not supported on this target}} - bf16 = (__bf16)f64bf16; - // r600-error@+1 {{__bf16 is not supported on this target}} typedef __attribute__((ext_vector_type(2))) __bf16 bf16_x2; bf16_x2 vec2_a, vec2_b; Index: clang/test/SemaCUDA/bf16.cu =================================================================== --- clang/test/SemaCUDA/bf16.cu +++ clang/test/SemaCUDA/bf16.cu @@ -2,32 +2,32 @@ // REQUIRES: x86-registered-target // RUN: %clang_cc1 "-triple" "x86_64-unknown-linux-gnu" "-aux-triple" "nvptx64-nvidia-cuda" \ -// RUN: "-target-cpu" "x86-64" -fsyntax-only -verify=scalar %s +// RUN: "-target-cpu" "x86-64" -fsyntax-only -verify=scalar -Wno-unused %s // RUN: %clang_cc1 "-aux-triple" "x86_64-unknown-linux-gnu" "-triple" "nvptx64-nvidia-cuda" \ -// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=scalar %s +// RUN: -fcuda-is-device "-aux-target-cpu" "x86-64" -fsyntax-only -verify=scalar -Wno-unused %s #include "Inputs/cuda.h" __device__ void test(bool b, __bf16 *out, __bf16 in) { __bf16 bf16 = in; // No error on using the type itself. - bf16 + bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 - bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 * bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} - bf16 / bf16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__bf16')}} + bf16 + bf16; + bf16 - bf16; + bf16 * bf16; + bf16 / bf16; __fp16 fp16; - bf16 + fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 + bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 - fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 - bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 * fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 * bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} - bf16 / fp16; // scalar-error {{invalid operands to binary expression ('__bf16' and '__fp16')}} - fp16 / bf16; // scalar-error {{invalid operands to binary expression ('__fp16' and '__bf16')}} + bf16 + fp16; + fp16 + bf16; + bf16 - fp16; + fp16 - bf16; + bf16 * fp16; + fp16 * bf16; + bf16 / fp16; + fp16 / bf16; bf16 = fp16; // scalar-error {{assigning to '__bf16' from incompatible type '__fp16'}} fp16 = bf16; // scalar-error {{assigning to '__fp16' from incompatible type '__bf16'}} - bf16 + (b ? fp16 : bf16); // scalar-error {{incompatible operand types ('__fp16' and '__bf16')}} + bf16 + (b ? fp16 : bf16); *out = bf16; }