Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -10838,7 +10838,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. Index: test/CodeGen/AMDGPU/fdiv.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f16.ll +++ test/CodeGen/AMDGPU/fdiv.f16.ll @@ -218,7 +218,7 @@ } ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] @@ -230,7 +230,7 @@ } ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -8,17 +8,11 @@ ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: ; X64-NEXT: rsqrtss %xmm0, %xmm1 -; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: cmpeqss %xmm0, %xmm2 ; X64-NEXT: mulss %xmm1, %xmm0 -; X64-NEXT: movss {{.*}}(%rip), %xmm3 -; X64-NEXT: mulss %xmm0, %xmm3 ; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: addss {{.*}}(%rip), %xmm0 -; X64-NEXT: mulss %xmm3, %xmm0 -; X64-NEXT: andnps %xmm0, %xmm2 -; X64-NEXT: movss {{.*}}(%rip), %xmm0 -; X64-NEXT: divss %xmm2, %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: @@ -89,10 +83,14 @@ define float @not_so_fast_recip_sqrt(float %x) { ; X64-LABEL: not_so_fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 -; X64-NEXT: movss %xmm1, {{.*}}(%rip) +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: sqrtss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss %xmm2, sqrt1(%rip) ; X64-NEXT: retq ; ; X86-LABEL: not_so_fast_recip_sqrt: