Index: clang/docs/UsersManual.rst =================================================================== --- clang/docs/UsersManual.rst +++ clang/docs/UsersManual.rst @@ -1309,6 +1309,7 @@ "ffp-contract", "{on, off, fast, fast-honor-pragmas}" "fdenormal-fp-math", "{IEEE, PreserveSign, PositiveZero}" "fdenormal-fp-math-fp32", "{IEEE, PreserveSign, PositiveZero}" + "fdenormal-fp-math-fp16", "{IEEE, PreserveSign, PositiveZero}" "fmath-errno", "{on, off}" "fhonor-nans", "{on, off}" "fhonor-infinities", "{on, off}" @@ -1331,6 +1332,7 @@ "contract", "on", "off", "fast" "denormal_fp_math", "IEEE", "IEEE", "PreserveSign" "denormal_fp32_math", "IEEE","IEEE", "PreserveSign" + "denormal_fp16_math", "IEEE","IEEE", "PreserveSign" "support_math_errno", "on", "on", "off" "no_honor_nans", "off", "off", "on" "no_honor_infinities", "off", "off", "on" Index: clang/include/clang/Basic/CodeGenOptions.h =================================================================== --- clang/include/clang/Basic/CodeGenOptions.h +++ clang/include/clang/Basic/CodeGenOptions.h @@ -196,6 +196,9 @@ /// The floating-point denormal mode to use, for float. llvm::DenormalMode FP32DenormalMode = llvm::DenormalMode::getIEEE(); + /// The floating-point denormal mode to use, for half float. + llvm::DenormalMode FP16DenormalMode = llvm::DenormalMode::getIEEE(); + /// The float precision limit to use, if non-empty. std::string LimitFloatPrecision; Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -5507,6 +5507,8 @@ def fdenormal_fp_math_f32_EQ : Joined<["-"], "fdenormal-fp-math-f32=">, Group; +def fdenormal_fp_math_f16_EQ : Joined<["-"], "fdenormal-fp-math-f16=">, + Group; } // let Flags = [CC1Option, NoDriverOption] Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -1848,6 +1848,10 @@ "denormal-fp-math-f32", CodeGenOpts.FP32DenormalMode.str()); } + if (CodeGenOpts.FP16DenormalMode != CodeGenOpts.FPDenormalMode) { + FuncAttrs.addAttribute("denormal-fp-math-f16", + CodeGenOpts.FP16DenormalMode.str()); + } if (LangOpts.getDefaultExceptionMode() == LangOptions::FPE_Ignore) FuncAttrs.addAttribute("no-trapping-math", "true"); Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -2763,9 +2763,12 @@ TC.getDefaultDenormalModeForType(Args, JA); const llvm::DenormalMode DefaultDenormalFP32Math = TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEsingle()); + const llvm::DenormalMode DefaultDenormalFP16Math = + TC.getDefaultDenormalModeForType(Args, JA, &llvm::APFloat::IEEEhalf()); llvm::DenormalMode DenormalFPMath = DefaultDenormalFPMath; llvm::DenormalMode DenormalFP32Math = DefaultDenormalFP32Math; + llvm::DenormalMode DenormalFP16Math = DefaultDenormalFP16Math; // CUDA and HIP don't rely on the frontend to pass an ffp-contract option. // If one wasn't given by the user, don't pass it here. StringRef FPContract; @@ -2802,6 +2805,8 @@ // -cl-denorms-are-zero. Should the target consider -fp-model interaction? DenormalFP32Math = llvm::DenormalMode::getIEEE(); + DenormalFP16Math = llvm::DenormalMode::getIEEE(); + StringRef Val = A->getValue(); if (OFastEnabled && !Val.equals("fast")) { // Only -ffp-model=fast is compatible with OFast, ignore. @@ -2911,6 +2916,14 @@ } break; + case options::OPT_fdenormal_fp_math_f16_EQ: + DenormalFP16Math = llvm::parseDenormalFPAttribute(A->getValue()); + if (!DenormalFP16Math.isValid()) { + D.Diag(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + } + break; + // Validate and pass through -ffp-contract option. case options::OPT_ffp_contract: { StringRef Val = A->getValue(); @@ -2995,6 +3008,7 @@ // The target may have opted to flush by default, so force IEEE. DenormalFPMath = llvm::DenormalMode::getIEEE(); DenormalFP32Math = llvm::DenormalMode::getIEEE(); + DenormalFP16Math = llvm::DenormalMode::getIEEE(); break; case options::OPT_Ofast: @@ -3029,6 +3043,7 @@ // -fno_fast_math restores default denormal and fpcontract handling DenormalFPMath = DefaultDenormalFPMath; DenormalFP32Math = llvm::DenormalMode::getIEEE(); + DenormalFP16Math = llvm::DenormalMode::getIEEE(); if (!JA.isDeviceOffloading(Action::OFK_Cuda) && !JA.isOffloading(Action::OFK_HIP)) if (FPContract == "fast") { @@ -3046,6 +3061,7 @@ SignedZeros && TrappingMath && RoundingFPMath && !ApproxFunc && DenormalFPMath == llvm::DenormalMode::getIEEE() && DenormalFP32Math == llvm::DenormalMode::getIEEE() && + DenormalFP16Math == llvm::DenormalMode::getIEEE() && FPContract.equals("off")) // OK: Current Arg doesn't conflict with -ffp-model=strict ; @@ -3109,6 +3125,14 @@ CmdArgs.push_back(Args.MakeArgString(ArgStr.str())); } + // Add f16 specific denormal mode flag if it's different. + if (DenormalFP16Math != DenormalFPMath) { + llvm::SmallString<64> DenormFlag; + llvm::raw_svector_ostream ArgStr(DenormFlag); + ArgStr << "-fdenormal-fp-math-f16=" << DenormalFP16Math; + CmdArgs.push_back(Args.MakeArgString(ArgStr.str())); + } + if (!FPContract.empty()) CmdArgs.push_back(Args.MakeArgString("-ffp-contract=" + FPContract)); Index: clang/lib/Frontend/CompilerInvocation.cpp =================================================================== --- clang/lib/Frontend/CompilerInvocation.cpp +++ clang/lib/Frontend/CompilerInvocation.cpp @@ -1509,6 +1509,11 @@ GenerateArg(Args, OPT_fdenormal_fp_math_f32_EQ, Opts.FP32DenormalMode.str(), SA); + if ((Opts.FPDenormalMode != Opts.FP16DenormalMode) || + (Opts.FP16DenormalMode != llvm::DenormalMode::getIEEE())) + GenerateArg(Args, OPT_fdenormal_fp_math_f16_EQ, Opts.FP16DenormalMode.str(), + SA); + if (Opts.StructReturnConvention == CodeGenOptions::SRCK_OnStack) { OptSpecifier Opt = T.isPPC32() ? OPT_maix_struct_return : OPT_fpcc_struct_return; @@ -1859,6 +1864,7 @@ StringRef Val = A->getValue(); Opts.FPDenormalMode = llvm::parseDenormalFPAttribute(Val); Opts.FP32DenormalMode = Opts.FPDenormalMode; + Opts.FP16DenormalMode = Opts.FPDenormalMode; if (!Opts.FPDenormalMode.isValid()) Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } @@ -1870,6 +1876,13 @@ Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; } + if (Arg *A = Args.getLastArg(OPT_fdenormal_fp_math_f16_EQ)) { + StringRef Val = A->getValue(); + Opts.FP16DenormalMode = llvm::parseDenormalFPAttribute(Val); + if (!Opts.FP16DenormalMode.isValid()) + Diags.Report(diag::err_drv_invalid_value) << A->getAsString(Args) << Val; + } + // X86_32 has -fppc-struct-return and -freg-struct-return. // PPC32 has -maix-struct-return and -msvr4-struct-return. if (Arg *A = Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -2171,6 +2171,13 @@ attempt is made to diagnose unsupported uses. Currently this attribute is respected by the AMDGPU and NVPTX backends. +``"denormal-fp-math-f16"`` + Same as ``"denormal-fp-math"``, but only controls the behavior of + the 16-bit float type (or vectors of 16-bit floats). If both are + are present, this overrides ``"denormal-fp-math"``. Not all targets + support separately setting the denormal mode per type, and no + attempt is made to diagnose unsupported uses. + ``"thunk"`` This attribute indicates that the function will delegate to some other function with a tail call. The prototype of a thunk should not be used for Index: llvm/include/llvm/CodeGen/CommandFlags.h =================================================================== --- llvm/include/llvm/CodeGen/CommandFlags.h +++ llvm/include/llvm/CodeGen/CommandFlags.h @@ -66,6 +66,7 @@ DenormalMode::DenormalModeKind getDenormalFPMath(); DenormalMode::DenormalModeKind getDenormalFP32Math(); +DenormalMode::DenormalModeKind getDenormalFP16Math(); bool getEnableHonorSignDependentRoundingFPMath(); Index: llvm/include/llvm/Target/TargetOptions.h =================================================================== --- llvm/include/llvm/Target/TargetOptions.h +++ llvm/include/llvm/Target/TargetOptions.h @@ -416,6 +416,10 @@ /// float. DenormalMode FP32DenormalMode; + /// Flushing mode to assume in default FP environment, for half float/vector + /// of half float. + DenormalMode FP16DenormalMode; + public: void setFPDenormalMode(DenormalMode Mode) { FPDenormalMode = Mode; @@ -425,6 +429,8 @@ FP32DenormalMode = Mode; } + void setFP16DenormalMode(DenormalMode Mode) { FP16DenormalMode = Mode; } + DenormalMode getRawFPDenormalMode() const { return FPDenormalMode; } @@ -433,6 +439,8 @@ return FP32DenormalMode; } + DenormalMode getRawFP16DenormalMode() const { return FP16DenormalMode; } + DenormalMode getDenormalMode(const fltSemantics &FPType) const; /// What exception model to use Index: llvm/lib/CodeGen/CommandFlags.cpp =================================================================== --- llvm/lib/CodeGen/CommandFlags.cpp +++ llvm/lib/CodeGen/CommandFlags.cpp @@ -68,6 +68,7 @@ CGOPT(bool, EnableAIXExtendedAltivecABI) CGOPT(DenormalMode::DenormalModeKind, DenormalFPMath) CGOPT(DenormalMode::DenormalModeKind, DenormalFP32Math) +CGOPT(DenormalMode::DenormalModeKind, DenormalFP16Math) CGOPT(bool, EnableHonorSignDependentRoundingFPMath) CGOPT(FloatABI::ABIType, FloatABIForCalls) CGOPT(FPOpFusion::FPOpFusionMode, FuseFPOps) @@ -263,6 +264,13 @@ DenormFlagEnumOptions); CGBINDOPT(DenormalFP32Math); + static cl::opt DenormalFP16Math( + "denormal-fp-math-f16", + cl::desc("Select which denormal numbers the code is permitted to require " + "for half float"), + cl::init(DenormalMode::Invalid), DenormFlagEnumOptions); + CGBINDOPT(DenormalFP16Math); + static cl::opt EnableHonorSignDependentRoundingFPMath( "enable-sign-dependent-rounding-fp-math", cl::Hidden, cl::desc("Force codegen to assume rounding mode can change dynamically"), @@ -693,6 +701,15 @@ DenormalMode(DenormKind, DenormKind).str()); } + if (DenormalFP16MathView->getNumOccurrences() > 0 && + !F.hasFnAttribute("denormal-fp-math-f16")) { + // FIXME: Command line flag should expose separate input/output modes. + DenormalMode::DenormalModeKind DenormKind = getDenormalFP16Math(); + + NewAttrs.addAttribute("denormal-fp-math-f16", + DenormalMode(DenormKind, DenormKind).str()); + } + if (TrapFuncNameView->getNumOccurrences() > 0) for (auto &B : F) for (auto &I : B) Index: llvm/lib/IR/Function.cpp =================================================================== --- llvm/lib/IR/Function.cpp +++ llvm/lib/IR/Function.cpp @@ -675,10 +675,15 @@ StringRef Val = Attr.getValueAsString(); if (!Val.empty()) return parseDenormalFPAttribute(Val); - - // If the f32 variant of the attribute isn't specified, try to use the - // generic one. } + if (&FPType == &APFloat::IEEEhalf()) { + Attribute Attr = getFnAttribute("denormal-fp-math-f16"); + StringRef Val = Attr.getValueAsString(); + if (!Val.empty()) + return parseDenormalFPAttribute(Val); + } + // If the f32 or f16 variant of the attribute isn't specified, try to use + // the generic one. Attribute Attr = getFnAttribute("denormal-fp-math"); return parseDenormalFPAttribute(Attr.getValueAsString()); Index: llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll =================================================================== --- llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll +++ llvm/test/Transforms/InstSimplify/constant-fold-fp-denormal.ll @@ -12,6 +12,73 @@ ; normal operand (a number plus zero is the same number). ; ============================================================================ ; +define half @test_half_fadd_ieee() #0 { +; CHECK-LABEL: @test_half_fadd_ieee( +; CHECK-NEXT: ret half 0xH8200 +; +; default ieee mode leaves result as a denormal + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + +define half @test_half_fadd_pzero_out() #1 { +; CHECK-LABEL: @test_half_fadd_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal result is flushed to positive zero + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + +define half @test_half_fadd_psign_out() #2 { +; CHECK-LABEL: @test_half_fadd_psign_out( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal result is flushed to sign preserved zero + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + +define half @test_half_fadd_pzero_in() #3 { +; CHECK-LABEL: @test_half_fadd_pzero_in( +; CHECK-NEXT: ret half 0xH8400 +; +; denormal operand is treated as zero +; normal operand added to zero results in the same operand as a result + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + +define half @test_half_fadd_psign_in() #4 { +; CHECK-LABEL: @test_half_fadd_psign_in( +; CHECK-NEXT: ret half 0xH8400 +; +; denormal operand is treated as zero +; normal operand added to zero results in the same operand as a result + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + +define half @test_half_fadd_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_half_fadd_pzero_f16_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; f16 only attribute should flush half float output +; same as pzero_out above + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + +define half @test_half_fadd_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_half_fadd_pzero_f32_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; +; f32 only attribute should not flush half float output +; default ieee mode leaves result as a denormal + %result = fadd half 0xH8400, 0xH0200 + ret half %result +} + define float @test_float_fadd_ieee() #0 { ; CHECK-LABEL: @test_float_fadd_ieee( ; CHECK-NEXT: ret float 0xB800000000000000 @@ -59,12 +126,22 @@ ret float %result } -define float @test_float_fadd_pzero_f32_out() #5 { -; CHECK-LABEL: @test_float_fadd_pzero_f32_out( +define float @test_float_fadd_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_float_fadd_pzero_f16_pzero_out( +; CHECK-NEXT: ret float 0xB800000000000000 +; +; f16 only attribute should not flush float output +; default ieee mode leaves result as a denormal + %result = fadd float 0xB810000000000000, 0x3800000000000000 + ret float %result +} + +define float @test_float_fadd_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_float_fadd_pzero_f32_pzero_out( ; CHECK-NEXT: ret float 0.000000e+00 ; ; f32 only attribute should flush float output -; default ieee mode leaves result as a denormal +; same as pzero_out above %result = fadd float 0xB810000000000000, 0x3800000000000000 ret float %result } @@ -116,11 +193,21 @@ ret double %result } -define double @test_double_fadd_f32_ieee() #5 { -; CHECK-LABEL: @test_double_fadd_f32_ieee( +define double @test_double_fadd_f16_pzero_out() #9 { +; CHECK-LABEL: @test_double_fadd_f16_pzero_out( ; CHECK-NEXT: ret double 0x8008000000000000 ; -; f32 only attribute should not flush doubles +; f16 only attribute should not flush double output +; default ieee mode leaves result as a denormal + %result = fadd double 0x8010000000000000, 0x0008000000000000 + ret double %result +} + +define double @test_double_fadd_f32_pzero_out() #5 { +; CHECK-LABEL: @test_double_fadd_f32_pzero_out( +; CHECK-NEXT: ret double 0x8008000000000000 +; +; f32 only attribute should not flush double output ; default ieee mode leaves result as a denormal %result = fadd double 0x8010000000000000, 0x0008000000000000 ret double %result @@ -134,6 +221,73 @@ ; negated normal operand (zero minus the original operand). ; ============================================================================ ; +define half @test_half_fsub_ieee() #0 { +; CHECK-LABEL: @test_half_fsub_ieee( +; CHECK-NEXT: ret half 0xH8200 +; +; default ieee mode leaves result as a denormal + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + +define half @test_half_fsub_pzero_out() #1 { +; CHECK-LABEL: @test_half_fsub_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal result is flushed to positive zero + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + +define half @test_half_fsub_psign_out() #2 { +; CHECK-LABEL: @test_half_fsub_psign_out( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal result is flushed to sign preserved zero + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + +define half @test_half_fsub_pzero_in() #3 { +; CHECK-LABEL: @test_half_fsub_pzero_in( +; CHECK-NEXT: ret half 0xH8400 +; +; denormal operand is treated as zero +; normal operand subtracted from zero produces the same operand, negated + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + +define half @test_half_fsub_psign_in() #4 { +; CHECK-LABEL: @test_half_fsub_psign_in( +; CHECK-NEXT: ret half 0xH8400 +; +; denormal operand is treated as zero +; normal operand subtracted from zero produces the same operand, negated + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + +define half @test_half_fsub_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_half_fsub_pzero_f16_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; f16 only attribute should flush half float output +; same as pzero_out above + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + +define half @test_half_fsub_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_half_fsub_pzero_f32_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; +; f32 only attribute should not flush half float output +; default ieee mode leaves result as a denormal + %result = fsub half 0xH0200, 0xH0400 + ret half %result +} + define float @test_float_fsub_ieee() #0 { ; CHECK-LABEL: @test_float_fsub_ieee( ; CHECK-NEXT: ret float 0xB800000000000000 @@ -181,8 +335,18 @@ ret float %result } -define float @test_float_fsub_pzero_f32_out() #5 { -; CHECK-LABEL: @test_float_fsub_pzero_f32_out( +define float @test_float_fsub_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_float_fsub_pzero_f16_pzero_out( +; CHECK-NEXT: ret float 0xB800000000000000 +; +; f16 only attribute should not flush float output +; default ieee mode leaves result as a denormal + %result = fsub float 0x3800000000000000, 0x3810000000000000 + ret float %result +} + +define float @test_float_fsub_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_float_fsub_pzero_f32_pzero_out( ; CHECK-NEXT: ret float 0.000000e+00 ; ; f32 only attribute should flush float output @@ -238,11 +402,21 @@ ret double %result } -define double @test_double_fsub_f32_ieee() #5 { -; CHECK-LABEL: @test_double_fsub_f32_ieee( +define double @test_double_fsub_f16_pzero_out() #9 { +; CHECK-LABEL: @test_double_fsub_f16_pzero_out( ; CHECK-NEXT: ret double 0x8008000000000000 ; -; f32 only attribute should not flush doubles +; f16 only attribute should not flush double output +; default ieee mode leaves result as a denormal + %result = fsub double 0x0008000000000000, 0x0010000000000000 + ret double %result +} + +define double @test_double_fsub_f32_pzero_out() #5 { +; CHECK-LABEL: @test_double_fsub_f32_pzero_out( +; CHECK-NEXT: ret double 0x8008000000000000 +; +; f32 only attribute should not flush double output ; default ieee mode leaves result as a denormal %result = fsub double 0x0008000000000000, 0x0010000000000000 ret double %result @@ -258,6 +432,73 @@ ; treated as zero, the result should also be zero. ; ============================================================================ ; +define half @test_half_fmul_ieee() #0 { +; CHECK-LABEL: @test_half_fmul_ieee( +; CHECK-NEXT: ret half 0xH8200 +; +; default ieee mode leaves result as a denormal + %result = fmul half 0xH0400, 0xHB800 + ret half %result +} + +define half @test_half_fmul_pzero_out() #1 { +; CHECK-LABEL: @test_half_fmul_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal result is flushed to positive zero + %result = fmul half 0xH0400, 0xHB800 + ret half %result +} + +define half @test_half_fmul_psign_out() #2 { +; CHECK-LABEL: @test_half_fmul_psign_out( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal result is flushed to sign preserved zero + %result = fmul half 0xH0400, 0xHB800 + ret half %result +} + +define half @test_half_fmul_pzero_in() #3 { +; CHECK-LABEL: @test_half_fmul_pzero_in( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal operand is treated as positive zero +; anything multiplied by zero gives a zero result + %result = fmul half 0xH8200, 0xH4000 + ret half %result +} + +define half @test_half_fmul_psign_in() #4 { +; CHECK-LABEL: @test_half_fmul_psign_in( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal operand is treated as signed zero +; anything multiplied by zero gives a zero result + %result = fmul half 0xH8200, 0xH4000 + ret half %result +} + +define half @test_half_fmul_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_half_fmul_pzero_f16_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; f16 only attribute should flush half float output +; same as pzero_out above + %result = fmul half 0xH0400, 0xHB800 + ret half %result +} + +define half @test_half_fmul_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_half_fmul_pzero_f32_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; +; f32 only attribute should not flush half float output +; default ieee mode leaves result as a denormal + %result = fmul half 0xH0400, 0xHB800 + ret half %result +} + define float @test_float_fmul_ieee() #0 { ; CHECK-LABEL: @test_float_fmul_ieee( ; CHECK-NEXT: ret float 0xB800000000000000 @@ -305,8 +546,18 @@ ret float %result } -define float @test_float_fmul_pzero_f32_out() #1 { -; CHECK-LABEL: @test_float_fmul_pzero_f32_out( +define float @test_float_fmul_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_float_fmul_pzero_f16_pzero_out( +; CHECK-NEXT: ret float 0xB800000000000000 +; +; f16 only attribute should not flush float output +; default ieee mode leaves result as a denormal + %result = fmul float 0x3810000000000000, -5.000000e-01 + ret float %result +} + +define float @test_float_fmul_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_float_fmul_pzero_f32_pzero_out( ; CHECK-NEXT: ret float 0.000000e+00 ; ; f32 only attribute should flush float output @@ -362,11 +613,21 @@ ret double %result } -define double @test_double_fmul_f32_ieee() #5 { -; CHECK-LABEL: @test_double_fmul_f32_ieee( +define double @test_double_fmul_f16_pzero_out() #9 { +; CHECK-LABEL: @test_double_fmul_f16_pzero_out( +; CHECK-NEXT: ret double 0x8008000000000000 +; +; f16 only attribute should not flush double output +; default ieee mode leaves result as a denormal + %result = fmul double 0x0010000000000000, -5.000000e-01 + ret double %result +} + +define double @test_double_fmul_f32_pzero_out() #5 { +; CHECK-LABEL: @test_double_fmul_f32_pzero_out( ; CHECK-NEXT: ret double 0x8008000000000000 ; -; f32 only attribute should not flush doubles +; f32 only attribute should not flush double output ; default ieee mode leaves result as a denormal %result = fmul double 0x0010000000000000, -5.000000e-01 ret double %result @@ -382,6 +643,73 @@ ; treated as zero, the result should also be zero. ; ============================================================================ ; +define half @test_half_fdiv_ieee() #0 { +; CHECK-LABEL: @test_half_fdiv_ieee( +; CHECK-NEXT: ret half 0xH8200 +; +; default ieee mode leaves result as a denormal + %result = fdiv half 0xH0400, 0xHC000 + ret half %result +} + +define half @test_half_fdiv_pzero_out() #1 { +; CHECK-LABEL: @test_half_fdiv_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal result is flushed to positive zero + %result = fdiv half 0xH0400, 0xHC000 + ret half %result +} + +define half @test_half_fdiv_psign_out() #2 { +; CHECK-LABEL: @test_half_fdiv_psign_out( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal result is flushed to sign preserved zero + %result = fdiv half 0xH0400, 0xHC000 + ret half %result +} + +define half @test_half_fdiv_pzero_in() #3 { +; CHECK-LABEL: @test_half_fdiv_pzero_in( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal operand is treated as zero +; zero divided by anything gives a zero result + %result = fdiv half 0xH8200, 0xH3800 + ret half %result +} + +define half @test_half_fdiv_psign_in() #4 { +; CHECK-LABEL: @test_half_fdiv_psign_in( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal operand is treated as zero +; zero divided by anything gives a zero result + %result = fmul half 0xH8200, 0xH3800 + ret half %result +} + +define half @test_half_fdiv_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_half_fdiv_pzero_f16_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; f16 only attribute should flush half float output +; same as pzero_out above + %result = fdiv half 0xH0400, 0xHC000 + ret half %result +} + +define half @test_half_fdiv_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_half_fdiv_pzero_f32_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; +; f32 only attribute should not flush half float output +; default ieee mode leaves result as a denormal + %result = fdiv half 0xH0400, 0xHC000 + ret half %result +} + define float @test_float_fdiv_ieee() #0 { ; CHECK-LABEL: @test_float_fdiv_ieee( ; CHECK-NEXT: ret float 0xB800000000000000 @@ -429,8 +757,18 @@ ret float %result } -define float @test_float_fdiv_pzero_f32_out() #1 { -; CHECK-LABEL: @test_float_fdiv_pzero_f32_out( +define float @test_float_fdiv_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_float_fdiv_pzero_f16_pzero_out( +; CHECK-NEXT: ret float 0xB800000000000000 +; +; f32 only attribute should not flush float output +; default ieee mode leaves result as a denormal + %result = fdiv float 0x3810000000000000, -2.000000e-00 + ret float %result +} + +define float @test_float_fdiv_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_float_fdiv_pzero_f32_pzero_out( ; CHECK-NEXT: ret float 0.000000e+00 ; ; f32 only attribute should flush float output @@ -486,11 +824,21 @@ ret double %result } -define double @test_double_fdiv_f32_ieee() #5 { -; CHECK-LABEL: @test_double_fdiv_f32_ieee( +define double @test_double_fdiv_f16_pzero_out() #9 { +; CHECK-LABEL: @test_double_fdiv_f16_pzero_out( +; CHECK-NEXT: ret double 0x8008000000000000 +; +; f16 only attribute should not flush double output +; default ieee mode leaves result as a denormal + %result = fdiv double 0x0010000000000000, -2.000000e-00 + ret double %result +} + +define double @test_double_fdiv_f32_pzero_out() #5 { +; CHECK-LABEL: @test_double_fdiv_f32_pzero_out( ; CHECK-NEXT: ret double 0x8008000000000000 ; -; f32 only attribute should not flush doubles +; f32 only attribute should not flush double output ; default ieee mode leaves result as a denormal %result = fdiv double 0x0010000000000000, -2.000000e-00 ret double %result @@ -506,6 +854,82 @@ ; the result also becomes zero. ; ============================================================================ ; +define half @test_half_frem_ieee_out() #0 { +; CHECK-LABEL: @test_half_frem_ieee_out( +; CHECK-NEXT: ret half 0xH8200 +; +; default ieee mode leaves result as a denormal + %result = frem half 0xH8600, 0xH0400 + ret half %result +} + +define half @test_half_frem_pzero_out() #1 { +; CHECK-LABEL: @test_half_frem_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal result is flushed to positive zero + %result = frem half 0xH8600, 0xH0400 + ret half %result +} + +define half @test_half_frem_psign_out() #2 { +; CHECK-LABEL: @test_half_frem_psign_out( +; CHECK-NEXT: ret half 0xH8000 +; +; denormal result is flushed to sign preserved zero + %result = frem half 0xH8600, 0xH0400 + ret half %result +} + +define half @test_half_frem_ieee_in() #0 { +; CHECK-LABEL: @test_half_frem_ieee_in( +; CHECK-NEXT: ret half 0xH0200 +; +; default ieee mode leaves result same as input + %result = frem half 0xH0200, 0xH4000 + ret half %result +} + +define half @test_half_frem_pzero_in() #3 { +; CHECK-LABEL: @test_half_frem_pzero_in( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal operand is treated as zero +; remainder is now zero + %result = frem half 0xH0200, 0xH4000 + ret half %result +} + +define half @test_half_frem_psign_in() #4 { +; CHECK-LABEL: @test_half_frem_psign_in( +; CHECK-NEXT: ret half 0xH0000 +; +; denormal operand is treated as zero +; remainder is now zero + %result = frem half 0xH0200, 0xH4000 + ret half %result +} + +define half @test_half_frem_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_half_frem_pzero_f16_pzero_out( +; CHECK-NEXT: ret half 0xH0000 +; +; f16 only attribute should flush half float output +; same as pzero_out above + %result = frem half 0xH8600, 0xH0400 + ret half %result +} + +define half @test_half_frem_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_half_frem_pzero_f32_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; +; f32 only attribute should not flush half float output +; default ieee mode leaves result as a denormal + %result = frem half 0xH8600, 0xH0400 + ret half %result +} + define float @test_float_frem_ieee_out() #0 { ; CHECK-LABEL: @test_float_frem_ieee_out( ; CHECK-NEXT: ret float 0xB800000000000000 @@ -562,8 +986,18 @@ ret float %result } -define float @test_float_frem_pzero_f32_out() #1 { -; CHECK-LABEL: @test_float_frem_pzero_f32_out( +define float @test_float_frem_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_float_frem_pzero_f16_pzero_out( +; CHECK-NEXT: ret float 0xB800000000000000 +; +; f16 only attribute should not flush float output +; default ieee mode leaves result as a denormal + %result = frem float 0xB818000000000000, 0x3810000000000000 + ret float %result +} + +define float @test_float_frem_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_float_frem_pzero_f32_pzero_out( ; CHECK-NEXT: ret float 0.000000e+00 ; ; f32 only attribute should flush float output @@ -628,11 +1062,21 @@ ret double %result } -define double @test_double_frem_f32_ieee() #5 { -; CHECK-LABEL: @test_double_frem_f32_ieee( +define double @test_double_frem_f16_pzero_out() #9 { +; CHECK-LABEL: @test_double_frem_f16_pzero_out( ; CHECK-NEXT: ret double 0x8008000000000000 ; -; f32 only attribute should not flush doubles +; f16 only attribute should not flush double output +; default ieee mode leaves result as a denormal + %result = frem double 0x8018000000000000, 0x0010000000000000 + ret double %result +} + +define double @test_double_frem_f32_pzero_out() #5 { +; CHECK-LABEL: @test_double_frem_f32_pzero_out( +; CHECK-NEXT: ret double 0x8008000000000000 +; +; f32 only attribute should not flush double output ; default ieee mode leaves result as a denormal %result = frem double 0x8018000000000000, 0x0010000000000000 ret double %result @@ -644,6 +1088,62 @@ ; these tests confirm fneg results are unchanged ; ============================================================================ ; +define half @test_half_fneg_ieee() #0 { +; CHECK-LABEL: @test_half_fneg_ieee( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + +define half @test_half_fneg_pzero_out() #1 { +; CHECK-LABEL: @test_half_fneg_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + +define half @test_half_fneg_psign_out() #2 { +; CHECK-LABEL: @test_half_fneg_psign_out( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + +define half @test_half_fneg_pzero_in() #3 { +; CHECK-LABEL: @test_half_fneg_pzero_in( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + +define half @test_half_fneg_psign_in() #4 { +; CHECK-LABEL: @test_half_fneg_psign_in( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + +define half @test_half_fneg_f16_pzero_out() #9 { +; CHECK-LABEL: @test_half_fneg_f16_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + +define half @test_half_fneg_f32_pzero_out() #5 { +; CHECK-LABEL: @test_half_fneg_f32_pzero_out( +; CHECK-NEXT: ret half 0xH8200 +; + %result = fneg half 0xH0200 + ret half %result +} + define float @test_float_fneg_ieee() #0 { ; CHECK-LABEL: @test_float_fneg_ieee( ; CHECK-NEXT: ret float 0xB800000000000000 @@ -652,7 +1152,7 @@ ret float %result } -define float @test_float_fneg_pzero_out() #0 { +define float @test_float_fneg_pzero_out() #1 { ; CHECK-LABEL: @test_float_fneg_pzero_out( ; CHECK-NEXT: ret float 0xB800000000000000 ; @@ -660,7 +1160,7 @@ ret float %result } -define float @test_float_fneg_psign_out() #0 { +define float @test_float_fneg_psign_out() #2 { ; CHECK-LABEL: @test_float_fneg_psign_out( ; CHECK-NEXT: ret float 0xB800000000000000 ; @@ -668,7 +1168,7 @@ ret float %result } -define float @test_float_fneg_pzero_in() #0 { +define float @test_float_fneg_pzero_in() #3 { ; CHECK-LABEL: @test_float_fneg_pzero_in( ; CHECK-NEXT: ret float 0xB800000000000000 ; @@ -676,7 +1176,7 @@ ret float %result } -define float @test_float_fneg_psign_in() #0 { +define float @test_float_fneg_psign_in() #4 { ; CHECK-LABEL: @test_float_fneg_psign_in( ; CHECK-NEXT: ret float 0xB800000000000000 ; @@ -684,8 +1184,16 @@ ret float %result } -define float @test_float_fneg_pzero_f32_out() #5 { -; CHECK-LABEL: @test_float_fneg_pzero_f32_out( +define float @test_float_fneg_pzero_f16_pzero_out() #9 { +; CHECK-LABEL: @test_float_fneg_pzero_f16_pzero_out( +; CHECK-NEXT: ret float 0xB800000000000000 +; + %result = fneg float 0x3800000000000000 + ret float %result +} + +define float @test_float_fneg_pzero_f32_pzero_out() #5 { +; CHECK-LABEL: @test_float_fneg_pzero_f32_pzero_out( ; CHECK-NEXT: ret float 0xB800000000000000 ; %result = fneg float 0x3800000000000000 @@ -732,14 +1240,26 @@ ret double %result } -define double @test_double_fneg_f32_ieee() #5 { -; CHECK-LABEL: @test_double_fneg_f32_ieee( +define double @test_double_fneg_f16_pzero_out() #9 { +; CHECK-LABEL: @test_double_fneg_f16_pzero_out( +; CHECK-NEXT: ret double 0x8008000000000000 +; + %result = fneg double 0x0008000000000000 + ret double %result +} + +define double @test_double_fneg_f32_pzero_out() #5 { +; CHECK-LABEL: @test_double_fneg_f32_pzero_out( ; CHECK-NEXT: ret double 0x8008000000000000 ; %result = fneg double 0x0008000000000000 ret double %result } +; ============================================================================ ; +; fcmp tests +; ============================================================================ ; + define i1 @fcmp_double_ieee_in_ieee_out() #0 { ; CHECK-LABEL: @fcmp_double_ieee_in_ieee_out( ; CHECK-NEXT: entry: @@ -1100,6 +1620,130 @@ ret i1 %cmp } +; check all types don't flush when set to ieee +define i1 @fcmp_half_ieee_in() #0 { +; CHECK-LABEL: @fcmp_half_ieee_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une half 0xH0200, 0xH000 + ret i1 %cmp +} + +define i1 @fcmp_float_ieee_in() #0 { +; CHECK-LABEL: @fcmp_float_ieee_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une float 0x3800000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_ieee_in() #0 { +; CHECK-LABEL: @fcmp_double_ieee_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +; check all types do flush inputs when set to positive zero +define i1 @fcmp_half_pzero_in() #3 { +; CHECK-LABEL: @fcmp_half_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp une half 0xH0200, 0xH000 + ret i1 %cmp +} + +define i1 @fcmp_float_pzero_in() #3 { +; CHECK-LABEL: @fcmp_float_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp une float 0x3800000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_pzero_in() #3 { +; CHECK-LABEL: @fcmp_double_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +; check only f32 flushes when f32 attribute is set +define i1 @fcmp_half_f32_pzero_in() #10 { +; CHECK-LABEL: @fcmp_half_f32_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une half 0xH0200, 0xH000 + ret i1 %cmp +} + +define i1 @fcmp_float_f32_pzero_in() #10 { +; CHECK-LABEL: @fcmp_float_f32_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp une float 0x3800000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_f32_pzero_in() #10 { +; CHECK-LABEL: @fcmp_double_f32_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + +; check only f16 flushes when f16 attribute is set +define i1 @fcmp_half_f16_pzero_in() #11 { +; CHECK-LABEL: @fcmp_half_f16_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 false +; +entry: + %cmp = fcmp une half 0xH0200, 0xH000 + ret i1 %cmp +} + +define i1 @fcmp_float_f16_pzero_in() #11 { +; CHECK-LABEL: @fcmp_float_f16_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une float 0x3800000000000000, 0x0 + ret i1 %cmp +} + +define i1 @fcmp_double_f16_pzero_in() #11 { +; CHECK-LABEL: @fcmp_double_f16_pzero_in( +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i1 true +; +entry: + %cmp = fcmp une double 0x0008000000000000, 0x0 + ret i1 %cmp +} + attributes #0 = { nounwind "denormal-fp-math"="ieee,ieee" } attributes #1 = { nounwind "denormal-fp-math"="positive-zero,ieee" } attributes #2 = { nounwind "denormal-fp-math"="preserve-sign,ieee" } @@ -1109,3 +1753,6 @@ attributes #6 = { nounwind "denormal-fp-math"="positive-zero,positive-zero" } attributes #7 = { nounwind "denormal-fp-math"="preserve-sign,preserve-sign" } attributes #8 = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="positive-zero,positive-zero" } +attributes #9 = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f16"="positive-zero,ieee" } +attributes #10 = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f32"="ieee,positive-zero" } +attributes #11 = { nounwind "denormal-fp-math"="ieee,ieee" "denormal-fp-math-f16"="ieee,positive-zero" }