Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4303,6 +4303,11 @@ } else Result = lowerRangeToAssertZExt(DAG, I, Result); + if (auto *FPOp = dyn_cast(&I)) { + SDNodeFlags Flags; + Flags.copyFMF(*FPOp); + Result->setFlags(Flags); + } setValue(&I, Result); } } @@ -6766,8 +6771,15 @@ if (IID) { RenameFn = visitIntrinsicCall(I, IID); - if (!RenameFn) + if (!RenameFn) { + if (auto *FPOp = dyn_cast(&I)) { + SDNodeFlags Flags; + Flags.copyFMF(*FPOp); + SDValue Res = getValue(&I); + Res->setFlags(Flags); + } return; + } } } Index: test/CodeGen/PowerPC/fmf-propagation.ll =================================================================== --- test/CodeGen/PowerPC/fmf-propagation.ll +++ test/CodeGen/PowerPC/fmf-propagation.ll @@ -156,7 +156,7 @@ ; This is the minimum FMF needed for this transform - the FMA allows reassociation. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' @@ -192,7 +192,7 @@ ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' @@ -228,7 +228,7 @@ ; The FMA is now fully 'fast'. This implies that reassociation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' @@ -264,7 +264,7 @@ ; This shouldn't change anything - the intermediate fmul result is now also flagged. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' -; FMFDEBUG: fma {{t[0-9]+}} +; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' @@ -300,7 +300,7 @@ ; Reduced precision for sqrt is allowed - should use estimate and NR iterations. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' -; FMFDEBUG: fsqrt {{t[0-9]+}} +; FMFDEBUG: fsqrt afn {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' @@ -340,7 +340,7 @@ ; The call is now fully 'fast'. This implies that approximation is allowed. ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' -; FMFDEBUG: fsqrt {{t[0-9]+}} +; FMFDEBUG: fsqrt nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' Index: test/CodeGen/X86/fmaxnum.ll =================================================================== --- test/CodeGen/X86/fmaxnum.ll +++ test/CodeGen/X86/fmaxnum.ll @@ -290,21 +290,12 @@ define double @maxnum_intrinsic_nnan_fmf_f64(double %a, double %b) { ; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f64: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm0, %xmm2 -; SSE-NEXT: cmpunordsd %xmm0, %xmm2 -; SSE-NEXT: movapd %xmm2, %xmm3 -; SSE-NEXT: andpd %xmm1, %xmm3 -; SSE-NEXT: maxsd %xmm0, %xmm1 -; SSE-NEXT: andnpd %xmm1, %xmm2 -; SSE-NEXT: orpd %xmm3, %xmm2 -; SSE-NEXT: movapd %xmm2, %xmm0 +; SSE-NEXT: maxsd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f64: ; AVX: # %bb.0: -; AVX-NEXT: vmaxsd %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordsd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %r = tail call nnan double @llvm.maxnum.f64(double %a, double %b) ret double %r @@ -315,19 +306,12 @@ define <4 x float> @maxnum_intrinsic_nnan_fmf_f432(<4 x float> %a, <4 x float> %b) { ; SSE-LABEL: maxnum_intrinsic_nnan_fmf_f432: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: maxps %xmm0, %xmm2 -; SSE-NEXT: cmpunordps %xmm0, %xmm0 -; SSE-NEXT: andps %xmm0, %xmm1 -; SSE-NEXT: andnps %xmm2, %xmm0 -; SSE-NEXT: orps %xmm1, %xmm0 +; SSE-NEXT: maxps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: maxnum_intrinsic_nnan_fmf_f432: ; AVX: # %bb.0: -; AVX-NEXT: vmaxps %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %r = tail call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %b) ret <4 x float> %r Index: test/CodeGen/X86/fminnum.ll =================================================================== --- test/CodeGen/X86/fminnum.ll +++ test/CodeGen/X86/fminnum.ll @@ -282,21 +282,12 @@ define float @minnum_intrinsic_nnan_fmf_f32(float %a, float %b) { ; SSE-LABEL: minnum_intrinsic_nnan_fmf_f32: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: cmpunordss %xmm0, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: andps %xmm1, %xmm3 -; SSE-NEXT: minss %xmm0, %xmm1 -; SSE-NEXT: andnps %xmm1, %xmm2 -; SSE-NEXT: orps %xmm3, %xmm2 -; SSE-NEXT: movaps %xmm2, %xmm0 +; SSE-NEXT: minss %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: minnum_intrinsic_nnan_fmf_f32: ; AVX: # %bb.0: -; AVX-NEXT: vminss %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordss %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvps %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %r = tail call nnan float @llvm.minnum.f32(float %a, float %b) ret float %r @@ -307,19 +298,12 @@ define <2 x double> @minnum_intrinsic_nnan_fmf_v2f64(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: minnum_intrinsic_nnan_fmf_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movapd %xmm1, %xmm2 -; SSE-NEXT: minpd %xmm0, %xmm2 -; SSE-NEXT: cmpunordpd %xmm0, %xmm0 -; SSE-NEXT: andpd %xmm0, %xmm1 -; SSE-NEXT: andnpd %xmm2, %xmm0 -; SSE-NEXT: orpd %xmm1, %xmm0 +; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: minnum_intrinsic_nnan_fmf_v2f64: ; AVX: # %bb.0: -; AVX-NEXT: vminpd %xmm0, %xmm1, %xmm2 -; AVX-NEXT: vcmpunordpd %xmm0, %xmm0, %xmm0 -; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm0 +; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 ; AVX-NEXT: retq %r = tail call nnan <2 x double> @llvm.minnum.v2f64(<2 x double> %a, <2 x double> %b) ret <2 x double> %r Index: test/CodeGen/X86/pr34149.ll =================================================================== --- test/CodeGen/X86/pr34149.ll +++ test/CodeGen/X86/pr34149.ll @@ -8,9 +8,7 @@ define <4 x double> @via_minnum(<4 x double> %x, <4 x double> %y) { ; CHECK-LABEL: via_minnum: ; CHECK: # %bb.0: -; CHECK-NEXT: vminpd %ymm0, %ymm1, %ymm2 -; CHECK-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vminpd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %z = call fast <4 x double> @llvm.minnum.v4f64(<4 x double> %x, <4 x double> %y) readnone ret <4 x double> %z @@ -19,9 +17,7 @@ define <4 x double> @via_maxnum(<4 x double> %x, <4 x double> %y) { ; CHECK-LABEL: via_maxnum: ; CHECK: # %bb.0: -; CHECK-NEXT: vmaxpd %ymm0, %ymm1, %ymm2 -; CHECK-NEXT: vcmpunordpd %ymm0, %ymm0, %ymm0 -; CHECK-NEXT: vblendvpd %ymm0, %ymm1, %ymm2, %ymm0 +; CHECK-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 ; CHECK-NEXT: retq %z = call fast <4 x double> @llvm.maxnum.v4f64(<4 x double> %x, <4 x double> %y) readnone ret <4 x double> %z