Index: llvm/test/CodeGen/X86/pow.ll =================================================================== --- llvm/test/CodeGen/X86/pow.ll +++ llvm/test/CodeGen/X86/pow.ll @@ -9,8 +9,42 @@ declare x86_fp80 @llvm.pow.f80(x86_fp80, x86_fp80) -define float @pow_f32_one_fourth_fmf(float %x) nounwind { -; CHECK-LABEL: pow_f32_one_fourth_fmf: +define float @pow_f32_one_fourth_fmf_ieee(float %x) nounwind { +; CHECK-LABEL: pow_f32_one_fourth_fmf_ieee: +; CHECK: # %bb.0: +; CHECK-NEXT: rsqrtss %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm0, %xmm3 +; CHECK-NEXT: mulss %xmm1, %xmm3 +; CHECK-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: movaps %xmm3, %xmm4 +; CHECK-NEXT: mulss %xmm2, %xmm4 +; CHECK-NEXT: mulss %xmm1, %xmm3 +; CHECK-NEXT: movss {{.*#+}} xmm5 = mem[0],zero,zero,zero +; CHECK-NEXT: addss %xmm5, %xmm3 +; CHECK-NEXT: mulss %xmm4, %xmm3 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: movss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: cmpltss %xmm4, %xmm0 +; CHECK-NEXT: andnps %xmm3, %xmm0 +; CHECK-NEXT: xorps %xmm3, %xmm3 +; CHECK-NEXT: rsqrtss %xmm0, %xmm3 +; CHECK-NEXT: andps %xmm0, %xmm1 +; CHECK-NEXT: mulss %xmm3, %xmm0 +; CHECK-NEXT: mulss %xmm0, %xmm2 +; CHECK-NEXT: mulss %xmm3, %xmm0 +; CHECK-NEXT: addss %xmm5, %xmm0 +; CHECK-NEXT: mulss %xmm2, %xmm0 +; CHECK-NEXT: cmpltss %xmm4, %xmm1 +; CHECK-NEXT: andnps %xmm0, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: retq + %r = call nsz ninf afn float @llvm.pow.f32(float %x, float 2.5e-01) + ret float %r +} + +define float @pow_f32_one_fourth_fmf_daz(float %x) #0 { +; CHECK-LABEL: pow_f32_one_fourth_fmf_daz: ; CHECK: # %bb.0: ; CHECK-NEXT: rsqrtss %xmm0, %xmm1 ; CHECK-NEXT: movaps %xmm0, %xmm2 @@ -60,21 +94,26 @@ ; CHECK-NEXT: movaps %xmm2, %xmm4 ; CHECK-NEXT: mulps %xmm3, %xmm4 ; CHECK-NEXT: mulps %xmm1, %xmm2 -; CHECK-NEXT: movaps {{.*#+}} xmm1 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] -; CHECK-NEXT: addps %xmm1, %xmm2 +; CHECK-NEXT: movaps {{.*#+}} xmm5 = [-3.0E+0,-3.0E+0,-3.0E+0,-3.0E+0] +; CHECK-NEXT: addps %xmm5, %xmm2 ; CHECK-NEXT: mulps %xmm4, %xmm2 -; CHECK-NEXT: xorps %xmm4, %xmm4 -; CHECK-NEXT: cmpneqps %xmm4, %xmm0 -; CHECK-NEXT: andps %xmm2, %xmm0 -; CHECK-NEXT: rsqrtps %xmm0, %xmm2 -; CHECK-NEXT: movaps %xmm0, %xmm5 -; CHECK-NEXT: mulps %xmm2, %xmm5 -; CHECK-NEXT: mulps %xmm5, %xmm3 -; CHECK-NEXT: mulps %xmm2, %xmm5 -; CHECK-NEXT: addps %xmm1, %xmm5 -; CHECK-NEXT: mulps %xmm3, %xmm5 -; CHECK-NEXT: cmpneqps %xmm4, %xmm0 -; CHECK-NEXT: andps %xmm5, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm4 = [NaN,NaN,NaN,NaN] +; CHECK-NEXT: andps %xmm4, %xmm0 +; CHECK-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38] +; CHECK-NEXT: movaps %xmm1, %xmm6 +; CHECK-NEXT: cmpleps %xmm0, %xmm6 +; CHECK-NEXT: andps %xmm2, %xmm6 +; CHECK-NEXT: rsqrtps %xmm6, %xmm0 +; CHECK-NEXT: movaps %xmm6, %xmm2 +; CHECK-NEXT: mulps %xmm0, %xmm2 +; CHECK-NEXT: mulps %xmm2, %xmm3 +; CHECK-NEXT: mulps %xmm0, %xmm2 +; CHECK-NEXT: addps %xmm5, %xmm2 +; CHECK-NEXT: mulps %xmm3, %xmm2 +; CHECK-NEXT: andps %xmm4, %xmm6 +; CHECK-NEXT: cmpleps %xmm6, %xmm1 +; CHECK-NEXT: andps %xmm2, %xmm1 +; CHECK-NEXT: movaps %xmm1, %xmm0 ; CHECK-NEXT: retq %r = call fast <4 x float> @llvm.pow.v4f32(<4 x float> %x, <4 x float> ) ret <4 x float> %r @@ -228,3 +267,4 @@ ret double %r } +attributes #0 = { nounwind "denormal-fp-math"="ieee,preserve-sign" } Index: llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath-mir.ll @@ -1,52 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2,fma -stop-after=finalize-isel 2>&1 | FileCheck %s -declare float @llvm.sqrt.f32(float) #0 +declare float @llvm.sqrt.f32(float) #2 -define float @foo(float %f) #0 { -; CHECK: {{name: *foo}} -; CHECK: body: -; CHECK: %0:fr32 = COPY $xmm0 -; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0 -; CHECK: %3:fr32 = VMULSSrr %0, %1 -; CHECK: %4:fr32 = VMOVSSrm -; CHECK: %5:fr32 = VFMADD213SSr %1, killed %3, %4 -; CHECK: %6:fr32 = VMOVSSrm -; CHECK: %7:fr32 = VMULSSrr %1, %6 -; CHECK: %8:fr32 = VMULSSrr killed %7, killed %5 -; CHECK: %9:fr32 = VMULSSrr %0, %8 -; CHECK: %10:fr32 = VFMADD213SSr %8, %9, %4 -; CHECK: %11:fr32 = VMULSSrr %9, %6 -; CHECK: %12:fr32 = VMULSSrr killed %11, killed %10 -; CHECK: %14:fr32 = FsFLD0SS -; CHECK: %15:fr32 = VCMPSSrr %0, killed %14, 0 -; CHECK: %17:vr128 = VPANDNrr killed %16, killed %13 -; CHECK: $xmm0 = COPY %18 -; CHECK: RET 0, $xmm0 - %call = tail call float @llvm.sqrt.f32(float %f) #1 +define float @sqrt_ieee(float %f) #0 { + ; CHECK-LABEL: name: sqrt_ieee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK: [[VMULSSrr:%[0-9]+]]:fr32 = VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VFMADD213SSr:%[0-9]+]]:fr32 = VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]] + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMULSSrr1:%[0-9]+]]:fr32 = VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: [[VMULSSrr2:%[0-9]+]]:fr32 = VMULSSrr killed [[VMULSSrr1]], killed [[VFMADD213SSr]], implicit $mxcsr + ; CHECK: [[VMULSSrr3:%[0-9]+]]:fr32 = VMULSSrr [[COPY]], [[VMULSSrr2]], implicit $mxcsr + ; CHECK: [[VFMADD213SSr1:%[0-9]+]]:fr32 = VFMADD213SSr [[VMULSSrr2]], [[VMULSSrr3]], [[VMOVSSrm_alt]] + ; CHECK: [[VMULSSrr4:%[0-9]+]]:fr32 = VMULSSrr [[VMULSSrr3]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: [[VMULSSrr5:%[0-9]+]]:fr32 = VMULSSrr killed [[VMULSSrr4]], killed [[VFMADD213SSr1]], implicit $mxcsr + ; CHECK: [[COPY1:%[0-9]+]]:vr128 = COPY [[VMULSSrr5]] + ; CHECK: [[COPY2:%[0-9]+]]:vr128 = COPY [[COPY]] + ; CHECK: [[VPBROADCASTDrm:%[0-9]+]]:vr128 = VPBROADCASTDrm $rip, 1, $noreg, %const.2, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VPANDrr:%[0-9]+]]:vr128 = VPANDrr killed [[COPY2]], killed [[VPBROADCASTDrm]] + ; CHECK: [[COPY3:%[0-9]+]]:fr32 = COPY [[VPANDrr]] + ; CHECK: [[VCMPSSrm:%[0-9]+]]:fr32 = VCMPSSrm killed [[COPY3]], $rip, 1, $noreg, %const.3, $noreg, 1, implicit $mxcsr :: (load 4 from constant-pool) + ; CHECK: [[COPY4:%[0-9]+]]:vr128 = COPY [[VCMPSSrm]] + ; CHECK: [[VPANDNrr:%[0-9]+]]:vr128 = VPANDNrr killed [[COPY4]], killed [[COPY1]] + ; CHECK: [[COPY5:%[0-9]+]]:fr32 = COPY [[VPANDNrr]] + ; CHECK: $xmm0 = COPY [[COPY5]] + ; CHECK: RET 0, $xmm0 + %call = tail call float @llvm.sqrt.f32(float %f) ret float %call } -define float @rfoo(float %f) #0 { -; CHECK: {{name: *rfoo}} -; CHECK: body: | -; CHECK: %0:fr32 = COPY $xmm0 -; CHECK: %1:fr32 = VRSQRTSSr killed %2, %0 -; CHECK: %3:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %0, %1 -; CHECK: %4:fr32 = VMOVSSrm -; CHECK: %5:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr %1, killed %3, %4 -; CHECK: %6:fr32 = VMOVSSrm -; CHECK: %7:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %1, %6 -; CHECK: %8:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed %7, killed %5 -; CHECK: %9:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %0, %8 -; CHECK: %10:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr %8, killed %9, %4 -; CHECK: %11:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr %8, %6 -; CHECK: %12:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed %11, killed %10 -; CHECK: $xmm0 = COPY %12 -; CHECK: RET 0, $xmm0 +define float @sqrt_daz(float %f) #2 { + ; CHECK-LABEL: name: sqrt_daz + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: [[VSQRTSSr:%[0-9]+]]:fr32 = VSQRTSSr killed [[DEF]], [[COPY]], implicit $mxcsr + ; CHECK: $xmm0 = COPY [[VSQRTSSr]] + ; CHECK: RET 0, $xmm0 + %call = tail call float @llvm.sqrt.f32(float %f) + ret float %call +} + +define float @rsqrt_ieee(float %f) #0 { + ; CHECK-LABEL: name: rsqrt_ieee + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK: [[VMULSSrr:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VFMADD213SSr:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]] + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMULSSrr1:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: [[VMULSSrr2:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed [[VMULSSrr1]], killed [[VFMADD213SSr]], implicit $mxcsr + ; CHECK: [[VMULSSrr3:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[COPY]], [[VMULSSrr2]], implicit $mxcsr + ; CHECK: [[VFMADD213SSr1:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr [[VMULSSrr2]], killed [[VMULSSrr3]], [[VMOVSSrm_alt]] + ; CHECK: [[VMULSSrr4:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[VMULSSrr2]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: [[VMULSSrr5:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed [[VMULSSrr4]], killed [[VFMADD213SSr1]], implicit $mxcsr + ; CHECK: $xmm0 = COPY [[VMULSSrr5]] + ; CHECK: RET 0, $xmm0 + %sqrt = tail call float @llvm.sqrt.f32(float %f) + %div = fdiv fast float 1.0, %sqrt + ret float %div +} + +define float @rsqrt_daz(float %f) #1 { + ; CHECK-LABEL: name: rsqrt_daz + ; CHECK: bb.0 (%ir-block.0): + ; CHECK: liveins: $xmm0 + ; CHECK: [[COPY:%[0-9]+]]:fr32 = COPY $xmm0 + ; CHECK: [[DEF:%[0-9]+]]:fr32 = IMPLICIT_DEF + ; CHECK: [[VRSQRTSSr:%[0-9]+]]:fr32 = VRSQRTSSr killed [[DEF]], [[COPY]] + ; CHECK: [[VMULSSrr:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[COPY]], [[VRSQRTSSr]], implicit $mxcsr + ; CHECK: [[VMOVSSrm_alt:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VFMADD213SSr:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr [[VRSQRTSSr]], killed [[VMULSSrr]], [[VMOVSSrm_alt]] + ; CHECK: [[VMOVSSrm_alt1:%[0-9]+]]:fr32 = VMOVSSrm_alt $rip, 1, $noreg, %const.1, $noreg :: (load 4 from constant-pool) + ; CHECK: [[VMULSSrr1:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[VRSQRTSSr]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: [[VMULSSrr2:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed [[VMULSSrr1]], killed [[VFMADD213SSr]], implicit $mxcsr + ; CHECK: [[VMULSSrr3:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[COPY]], [[VMULSSrr2]], implicit $mxcsr + ; CHECK: [[VFMADD213SSr1:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VFMADD213SSr [[VMULSSrr2]], killed [[VMULSSrr3]], [[VMOVSSrm_alt]] + ; CHECK: [[VMULSSrr4:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr [[VMULSSrr2]], [[VMOVSSrm_alt1]], implicit $mxcsr + ; CHECK: [[VMULSSrr5:%[0-9]+]]:fr32 = nnan ninf nsz arcp contract afn reassoc VMULSSrr killed [[VMULSSrr4]], killed [[VFMADD213SSr1]], implicit $mxcsr + ; CHECK: $xmm0 = COPY [[VMULSSrr5]] + ; CHECK: RET 0, $xmm0 %sqrt = tail call float @llvm.sqrt.f32(float %f) %div = fdiv fast float 1.0, %sqrt ret float %div } -attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" } -attributes #1 = { nounwind readnone } +attributes #0 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,ieee" } +attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt:2" "denormal-fp-math"="ieee,preserve-sign" } +attributes #2 = { nounwind readnone } Index: llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll @@ -10,13 +10,13 @@ declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0 declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 -define float @foo_x1(float %f) #0 { -; SCALAR-EST-LABEL: foo_x1: +define float @foo_x1_ieee(float %f) #0 { +; SCALAR-EST-LABEL: foo_x1_ieee: ; SCALAR-EST: # %bb.0: ; SCALAR-EST-NEXT: rsqrtss %xmm0 ; SCALAR-EST: retq ; -; SCALAR-ACC-LABEL: foo_x1: +; SCALAR-ACC-LABEL: foo_x1_ieee: ; SCALAR-ACC: # %bb.0: ; SCALAR-ACC-NEXT: {{^ *v?sqrtss %xmm0}} ; SCALAR-ACC-NEXT: retq @@ -24,13 +24,41 @@ ret float %call } -define <4 x float> @foo_x4(<4 x float> %f) #0 { -; VECTOR-EST-LABEL: foo_x4: +define float @foo_x1_daz(float %f) #2 { +; SCALAR-EST-LABEL: foo_x1_daz: +; SCALAR-EST: # %bb.0: +; SCALAR-EST-NEXT: rsqrtss %xmm0 +; SCALAR-EST: retq +; +; SCALAR-ACC-LABEL: foo_x1_daz: +; SCALAR-ACC: # %bb.0: +; SCALAR-ACC-NEXT: {{^ *v?sqrtss %xmm0}} +; SCALAR-ACC-NEXT: retq + %call = tail call float @llvm.sqrt.f32(float %f) #1 + ret float %call +} + +define <4 x float> @foo_x4_ieee(<4 x float> %f) #0 { +; VECTOR-EST-LABEL: foo_x4_ieee: +; VECTOR-EST: # %bb.0: +; VECTOR-EST-NEXT: rsqrtps %xmm0 +; VECTOR-EST: retq +; +; VECTOR-ACC-LABEL: foo_x4_ieee: +; VECTOR-ACC: # %bb.0: +; VECTOR-ACC-NEXT: {{^ *v?sqrtps %xmm0}} +; VECTOR-ACC-NEXT: retq + %call = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> %f) #1 + ret <4 x float> %call +} + +define <4 x float> @foo_x4_daz(<4 x float> %f) #2 { +; VECTOR-EST-LABEL: foo_x4_daz: ; VECTOR-EST: # %bb.0: ; VECTOR-EST-NEXT: rsqrtps %xmm0 ; VECTOR-EST: retq ; -; VECTOR-ACC-LABEL: foo_x4: +; VECTOR-ACC-LABEL: foo_x4_daz: ; VECTOR-ACC: # %bb.0: ; VECTOR-ACC-NEXT: {{^ *v?sqrtps %xmm0}} ; VECTOR-ACC-NEXT: retq @@ -38,13 +66,28 @@ ret <4 x float> %call } -define <8 x float> @foo_x8(<8 x float> %f) #0 { -; VECTOR-EST-LABEL: foo_x8: +define <8 x float> @foo_x8_ieee(<8 x float> %f) #0 { +; VECTOR-EST-LABEL: foo_x8_ieee: +; VECTOR-EST: # %bb.0: +; VECTOR-EST: rsqrtps +; VECTOR-EST: retq +; +; VECTOR-ACC-LABEL: foo_x8_ieee: +; VECTOR-ACC: # %bb.0: +; VECTOR-ACC: {{^ *v?sqrtps %[xy]mm0}} +; VECTOR-ACC-NOT: rsqrt +; VECTOR-ACC: retq + %call = tail call <8 x float> @llvm.sqrt.v8f32(<8 x float> %f) #1 + ret <8 x float> %call +} + +define <8 x float> @foo_x8_daz(<8 x float> %f) #2 { +; VECTOR-EST-LABEL: foo_x8_daz: ; VECTOR-EST: # %bb.0: ; VECTOR-EST-NEXT: rsqrtps ; VECTOR-EST: retq ; -; VECTOR-ACC-LABEL: foo_x8: +; VECTOR-ACC-LABEL: foo_x8_daz: ; VECTOR-ACC: # %bb.0: ; VECTOR-ACC-NEXT: {{^ *v?sqrtps %[xy]mm0}} ; VECTOR-ACC-NOT: rsqrt @@ -53,5 +96,6 @@ ret <8 x float> %call } -attributes #0 = { "unsafe-fp-math"="true" } +attributes #0 = { "denormal-fp-math"="ieee,ieee" "unsafe-fp-math"="true" } attributes #1 = { nounwind readnone } +attributes #2 = { "denormal-fp-math"="ieee,preseve-sign" "unsafe-fp-math"="true" } Index: llvm/test/CodeGen/X86/sqrt-fastmath.ll =================================================================== --- llvm/test/CodeGen/X86/sqrt-fastmath.ll +++ llvm/test/CodeGen/X86/sqrt-fastmath.ll @@ -56,8 +56,55 @@ ret float %call } -define float @finite_f32_estimate(float %f) #1 { -; SSE-LABEL: finite_f32_estimate: +define float @finite_f32_estimate_ieee(float %f) #1 { +; SSE-LABEL: finite_f32_estimate_ieee: +; SSE: # %bb.0: +; SSE-NEXT: rsqrtss %xmm0, %xmm1 +; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: mulss %xmm1, %xmm2 +; SSE-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; SSE-NEXT: mulss %xmm2, %xmm3 +; SSE-NEXT: mulss %xmm1, %xmm2 +; SSE-NEXT: addss {{.*}}(%rip), %xmm2 +; SSE-NEXT: mulss %xmm3, %xmm2 +; SSE-NEXT: andps {{.*}}(%rip), %xmm0 +; SSE-NEXT: cmpltss {{.*}}(%rip), %xmm0 +; SSE-NEXT: andnps %xmm2, %xmm0 +; SSE-NEXT: retq +; +; AVX1-LABEL: finite_f32_estimate_ieee: +; AVX1: # %bb.0: +; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 +; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vaddss {{.*}}(%rip), %xmm1, %xmm1 +; AVX1-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 +; AVX1-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX1-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %xmm0 +; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX512-LABEL: finite_f32_estimate_ieee: +; AVX512: # %bb.0: +; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 +; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 +; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm1 = (xmm2 * xmm1) + mem +; AVX512-NEXT: vmulss {{.*}}(%rip), %xmm2, %xmm2 +; AVX512-NEXT: vmulss %xmm1, %xmm2, %xmm1 +; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN] +; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0 +; AVX512-NEXT: vcmpltss {{.*}}(%rip), %xmm0, %k1 +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; AVX512-NEXT: vmovss %xmm0, %xmm1, %xmm1 {%k1} +; AVX512-NEXT: vmovaps %xmm1, %xmm0 +; AVX512-NEXT: retq + %call = tail call float @__sqrtf_finite(float %f) #2 + ret float %call +} + +define float @finite_f32_estimate_daz(float %f) #4 { +; SSE-LABEL: finite_f32_estimate_daz: ; SSE: # %bb.0: ; SSE-NEXT: rsqrtss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -72,7 +119,7 @@ ; SSE-NEXT: andnps %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: finite_f32_estimate: +; AVX1-LABEL: finite_f32_estimate_daz: ; AVX1: # %bb.0: ; AVX1-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX1-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -85,7 +132,7 @@ ; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: retq ; -; AVX512-LABEL: finite_f32_estimate: +; AVX512-LABEL: finite_f32_estimate_daz: ; AVX512: # %bb.0: ; AVX512-NEXT: vrsqrtss %xmm0, %xmm0, %xmm1 ; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm2 @@ -531,4 +578,4 @@ attributes #1 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" } attributes #2 = { nounwind readnone } attributes #3 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee" } - +attributes #4 = { "unsafe-fp-math"="true" "reciprocal-estimates"="sqrt,vec-sqrt" "denormal-fp-math"="ieee,preserve-sign" }