Index: clang/lib/CodeGen/CGCall.cpp =================================================================== --- clang/lib/CodeGen/CGCall.cpp +++ clang/lib/CodeGen/CGCall.cpp @@ -1748,11 +1748,10 @@ if (CodeGenOpts.NullPointerIsValid) FuncAttrs.addAttribute("null-pointer-is-valid", "true"); - // TODO: Omit attribute when the default is IEEE. - if (CodeGenOpts.FPDenormalMode.isValid()) + if (CodeGenOpts.FPDenormalMode != llvm::DenormalMode::getIEEE()) FuncAttrs.addAttribute("denormal-fp-math", CodeGenOpts.FPDenormalMode.str()); - if (CodeGenOpts.FP32DenormalMode.isValid()) { + if (CodeGenOpts.FP32DenormalMode != CodeGenOpts.FPDenormalMode) { FuncAttrs.addAttribute( "denormal-fp-math-f32", CodeGenOpts.FP32DenormalMode.str()); Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -2825,8 +2825,8 @@ } else if (TrappingMathPresent) CmdArgs.push_back("-fno-trapping-math"); - // TODO: Omit flag for the default IEEE instead - if (DenormalFPMath.isValid()) { + // The default is IEEE. + if (DenormalFPMath != llvm::DenormalMode::getIEEE()) { llvm::SmallString<64> DenormFlag; llvm::raw_svector_ostream ArgStr(DenormFlag); ArgStr << "-fdenormal-fp-math=" << DenormalFPMath; Index: clang/test/CodeGen/denormalfpmode.c =================================================================== --- clang/test/CodeGen/denormalfpmode.c +++ clang/test/CodeGen/denormalfpmode.c @@ -3,7 +3,9 @@ // RUN: %clang_cc1 -S -fdenormal-fp-math=positive-zero %s -emit-llvm -o - | FileCheck %s --check-prefix=CHECK-PZ // CHECK-LABEL: main -// CHECK-IEEE: attributes #0 = {{.*}}"denormal-fp-math"="ieee,ieee"{{.*}} + +// The ieee,ieee is the default, so omit the attribute +// CHECK-IEEE-NOT:"denormal-fp-math" // CHECK-PS: attributes #0 = {{.*}}"denormal-fp-math"="preserve-sign,preserve-sign"{{.*}} // CHECK-PZ: attributes #0 = {{.*}}"denormal-fp-math"="positive-zero,positive-zero"{{.*}} Index: clang/test/CodeGenCUDA/flush-denormals.cu =================================================================== --- clang/test/CodeGenCUDA/flush-denormals.cu +++ clang/test/CodeGenCUDA/flush-denormals.cu @@ -39,7 +39,7 @@ extern "C" __device__ void foo() {} // FTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="preserve-sign,preserve-sign" -// NOFTZ: attributes #0 = {{.*}} "denormal-fp-math-f32"="ieee,ieee" +// NOFTZ-NOT: "denormal-fp-math-f32" // AMDNOFTZ: attributes #0 = {{.*}}+fp32-denormals{{.*}}+fp64-fp16-denormals // AMDFTZ: attributes #0 = {{.*}}+fp64-fp16-denormals{{.*}}-fp32-denormals Index: clang/test/CodeGenCUDA/propagate-metadata.cu =================================================================== --- clang/test/CodeGenCUDA/propagate-metadata.cu +++ clang/test/CodeGenCUDA/propagate-metadata.cu @@ -75,11 +75,11 @@ // CHECK-SAME: convergent // CHECK-NOT: norecurse -// FTZ-SAME: "denormal-fp-math"="ieee,ieee" -// NOFTZ-SAME: "denormal-fp-math"="ieee,ieee" +// FTZ-NOT: "denormal-fp-math" +// NOFTZ-NOT: "denormal-fp-math" // FTZ-SAME: "denormal-fp-math-f32"="preserve-sign,preserve-sign" -// NOFTZ-SAME: "denormal-fp-math-f32"="ieee,ieee" +// NOFTZ-NOT: "denormal-fp-math-f32" // CHECK-SAME: "no-trapping-math"="true" Index: clang/test/Driver/denormal-fp-math.c =================================================================== --- clang/test/Driver/denormal-fp-math.c +++ clang/test/Driver/denormal-fp-math.c @@ -8,8 +8,8 @@ // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,ieee -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID2 %s // RUN: not %clang -target arm-unknown-linux-gnu -c %s -fdenormal-fp-math=foo,foo -v 2>&1 | FileCheck -check-prefix=CHECK-INVALID3 %s -// TODO: ieee is the implied default, and the flag is not passed. -// CHECK-IEEE: "-fdenormal-fp-math=ieee,ieee" +// IEEE is the implied default, and the flag is not passed. +// CHECK-IEEE-NOT: -fdenormal-fp-math= // CHECK-PS: "-fdenormal-fp-math=preserve-sign,preserve-sign" // CHECK-PZ: "-fdenormal-fp-math=positive-zero,positive-zero" // CHECK-NO-UNSAFE-NOT: "-fdenormal-fp-math=ieee" Index: llvm/lib/CodeGen/MachineFunction.cpp =================================================================== --- llvm/lib/CodeGen/MachineFunction.cpp +++ llvm/lib/CodeGen/MachineFunction.cpp @@ -284,15 +284,7 @@ // TODO: Should probably avoid the connection to the IR and store directly // in the MachineFunction. Attribute Attr = F.getFnAttribute("denormal-fp-math"); - - // FIXME: This should assume IEEE behavior on an unspecified - // attribute. However, the one current user incorrectly assumes a non-IEEE - // target by default. - StringRef Val = Attr.getValueAsString(); - if (Val.empty()) - return DenormalMode::getInvalid(); - - return parseDenormalFPAttribute(Val); + return parseDenormalFPAttribute(Attr.getValueAsString()); } /// Should we be emitting segmented stack stuff for the function Index: llvm/test/CodeGen/X86/pow.ll =================================================================== --- llvm/test/CodeGen/X86/pow.ll +++ llvm/test/CodeGen/X86/pow.ll @@ -9,8 +9,42 @@ declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) -define float @pow_f32_one_fourth_fmf(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_fourth_fmf: +define float @pow_f32_one_fourth_fmf_ieee(float %x) nounwind { +; CHECK-LABEL: pow_f32_one_fourth_fmf_ieee: +; CHECK: # %bb.0: +; CHECK-NEXT: rsqrtss %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: mulss %xmm1, %xmm3 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: movaps %xmm3, %xmm4 +; CHECK-NEXT: mulss %xmm2, %xmm4 +; CHECK-NEXT: mulss %xmm1, %xmm3 +; CHECK-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm5, %xmm3 +; CHECK-NEXT: mulss %xmm4, %xmm3 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: cmpltss %xmm4, %xmm0 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: rsqrtss %xmm0, %xmm3 +; CHECK-NEXT: andps %xmm0, %xmm1 +; CHECK-NEXT: mulss %xmm3, %xmm0 +; CHECK-NEXT: mulss %xmm0, %xmm2 +; CHECK-NEXT: mulss %xmm3, %xmm0 +; CHECK-NEXT: addss %xmm5, %xmm0 +; CHECK-NEXT: mulss %xmm2, %xmm0 +; CHECK-NEXT: cmpltss %xmm4, %xmm1 +; CHECK-NEXT: andnps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) + ret float %r +} + +define float @pow_f32_one_fourth_fmf_daz(float %x) #0 { +; CHECK-LABEL: pow_f32_one_fourth_fmf_daz: ; CHECK: # %bb.0: ; CHECK-NEXT: rsqrtss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm0, %xmm2 @@ -60,21 +94,26 @@ ; CHECK-NEXT: movaps %xmm2, %xmm4 ; CHECK-NEXT: mulps %xmm3, %xmm4 ; CHECK-NEXT: mulps %xmm1, %xmm2 -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] -; CHECK-NEXT: addps %xmm1, %xmm2 +; CHECK-NEXT: movaps {{.*#+}} xmm5 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] +; CHECK-NEXT: addps %xmm5, %xmm2 ; CHECK-NEXT: mulps %xmm4, %xmm2 -; CHECK-NEXT: xorps %xmm4, %xmm4 -; CHECK-NEXT: cmpneqps %xmm4, %xmm0 -; CHECK-NEXT: andps %xmm2, %xmm0 -; CHECK-NEXT: rsqrtps %xmm0, %xmm2 -; CHECK-NEXT: movaps %xmm0, %xmm5 -; CHECK-NEXT: mulps %xmm2, %xmm5 -; CHECK-NEXT: mulps %xmm5, %xmm3 -; CHECK-NEXT: mulps %xmm2, %xmm5 -; CHECK-NEXT: addps %xmm1, %xmm5 -; CHECK-NEXT: mulps %xmm3, %xmm5 -; CHECK-NEXT: cmpneqps %xmm4, %xmm0 -; CHECK-NEXT: andps %xmm5, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: andps %xmm4, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; CHECK-NEXT: movaps %xmm1, %xmm6 +; CHECK-NEXT: cmpleps %xmm0, %xmm6 +; CHECK-NEXT: andps %xmm2, %xmm6 +; CHECK-NEXT: rsqrtps %xmm6, %xmm0 +; CHECK-NEXT: movaps %xmm6, %xmm2 +; CHECK-NEXT: mulps %xmm0, %xmm2 +; CHECK-NEXT: mulps %xmm2, %xmm3 +; CHECK-NEXT: mulps %xmm0, %xmm2 +; CHECK-NEXT: addps %xmm5, %xmm2 +; CHECK-NEXT: mulps %xmm3, %xmm2 +; CHECK-NEXT: andps %xmm4, %xmm6 +; CHECK-NEXT: cmpleps %xmm6, %xmm1 +; CHECK-NEXT: andps %xmm2, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r @@ -228,3 +267,4 @@ ret double %r } +attributes #0 = { nounwind "denormal-fp-math"="ieee,preserve-sign" } Index: llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -1,10 +1,42 @@ ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s -declare float @llvm.sqrt.f32(float) #0 +declare float @llvm.sqrt.f32(float) #2 -define float @foo(float %f) #0 { - ; CHECK-LABEL: name: foo +define float @sqrt_ieee(float %f) #0 { + ; CHECK-LABEL: name: sqrt_ieee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK: %3:fr32 = nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: %5:fr32 = nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: %7:fr32 = nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %8:fr32 = nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr + ; CHECK: %9:fr32 = nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr + ; CHECK: %10:fr32 = nofpexcept VFMADD213SSr %8, %9, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %11:fr32 = nofpexcept VMULSSrr %9, [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %12:fr32 = nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr + ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY %12 + ; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]] + ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VPANDrr:%[0-9]+]]:vr128 = VPANDrr killed [[COPY2]], killed [[VPBROADCASTDrm]] + ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDrr]] + ; CHECK: %18:fr32 = nofpexcept VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load 4 from constant-pool) + ; CHECK: [[COPY4:%[0-9]+]]:vr128 = COPY %18 + ; CHECK: [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY4]], killed [[COPY1]] + ; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] + ; CHECK: $xmm0 = COPY [[COPY5]] + ; CHECK: RET 0, $xmm0 + %call = tail call float @llvm.sqrt.f32(float %f) + ret float %call +} + +define float @sqrt_daz(float %f) #1 { + ; CHECK-LABEL: name: sqrt_daz ; CHECK: bb.0 (%ir-block.0): ; CHECK: liveins: $xmm0 ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 @@ -28,12 +60,36 @@ ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] ; CHECK: $xmm0 = COPY [[COPY3]] ; CHECK: RET 0, $xmm0 - %call = tail call float @llvm.sqrt.f32(float %f) #1 + %call = tail call float @llvm.sqrt.f32(float %f) ret float %call } -define float @rfoo(float %f) #0 { - ; CHECK-LABEL: name: rfoo +define float @rsqrt_ieee(float %f) #0 { + ; CHECK-LABEL: name: rsqrt_ieee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK: %3:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: %5:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr [[VRSQRTSSr]], killed %3, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: %7:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %8:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %7, killed %5, implicit $mxcsr + ; CHECK: %9:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr [[COPY]], %8, implicit $mxcsr + ; CHECK: %10:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VFMADD213SSr %8, killed %9, [[VMOVSSrm_alt]], implicit $mxcsr + ; CHECK: %11:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr %8, [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: %12:fr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept VMULSSrr killed %11, killed %10, implicit $mxcsr + ; CHECK: $xmm0 = COPY %12 + ; CHECK: RET 0, $xmm0 + %sqrt = tail call float @llvm.sqrt.f32(float %f) + %div = fdiv fast float 1.0, %sqrt + ret float %div +} + +define float @rsqrt_daz(float %f) #1 { + ; CHECK-LABEL: name: rsqrt_daz ; CHECK: bb.0 (%ir-block.0): ; CHECK: liveins: $xmm0 ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 @@ -56,5 +112,6 @@ ret float %div } -attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" } -attributes #1 = { nounwind readnone } +attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" } +attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" } +attributes #2 = { nounwind readnone } Index: llvm/test/CodeGen/X86/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -56,8 +56,55 @@ ret float %call } -define float @finite_f32_estimate(float %f) #1 { -; SSE-LABEL: finite_f32_estimate: +define float @finite_f32_estimate_ieee(float %f) #1 { +; SSE-LABEL: finite_f32_estimate_ieee: +; SSE: # %bb.0: +; SSE-NEXT: rsqrtss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: mulss %xmm1, %xmm2 +; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SSE-NEXT: mulss %xmm2, %xmm3 +; SSE-NEXT: mulss %xmm1, %xmm2 +; SSE-NEXT: addss {{.*}}(%rip), %xmm2 +; SSE-NEXT: mulss %xmm3, %xmm2 +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: cmpltss {{.*}}(%rip), %xmm0 +; SSE-NEXT: andnps %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: finite_f32_estimate_ieee: +; AVX1: # %bb.0: +; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 +; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: finite_f32_estimate_ieee: +; AVX512: # %bb.0: +; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 +; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] +; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %k1 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: retq + %call = tail call float @__sqrtf_finite(float %f) #2 + ret float %call +} + +define float @finite_f32_estimate_daz(float %f) #4 { +; SSE-LABEL: finite_f32_estimate_daz: ; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -72,7 +119,7 @@ ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: finite_f32_estimate: +; AVX1-LABEL: finite_f32_estimate_daz: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -85,7 +132,7 @@ ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: finite_f32_estimate: +; AVX512-LABEL: finite_f32_estimate_daz: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -531,4 +578,4 @@ attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" } attributes #2 = { nounwind readnone } attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" } - +attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }