Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -673,6 +673,7 @@ // Don't allow anything with multiple uses unless we know it is free. EVT VT = Op.getValueType(); + const SDNodeFlags Flags = Op->getFlags(); if (!Op.hasOneUse()) if (!(Op.getOpcode() == ISD::FP_EXTEND && TLI.isFPExtFree(VT, Op.getOperand(0).getValueType()))) @@ -717,7 +718,6 @@ return 1; case ISD::FMUL: - case ISD::FDIV: if (Options->HonorSignDependentRoundingFPMath()) return 0; // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) @@ -727,6 +727,18 @@ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, Depth + 1); + + case ISD::FDIV: + if (Options->HonorSignDependentRoundingFPMathOption && + !Options->UnsafeFPMath && !Flags.hasNoNaNs()) return 0; + + // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) + if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, Depth + 1)) + return V; + + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, + Depth + 1); case ISD::FP_EXTEND: case ISD::FP_ROUND: @@ -782,7 +794,7 @@ case ISD::FMUL: case ISD::FDIV: - assert(!Options.HonorSignDependentRoundingFPMath()); + assert(!Options.HonorSignDependentRoundingFPMath() || Flags.hasNoNaNs()); // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) if (isNegatibleForFree(Op.getOperand(0), LegalOperations, @@ -10789,7 +10801,7 @@ if (SDValue NewSel = foldBinOpIntoSelect(N)) return NewSel; - if (Options.UnsafeFPMath) { + if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) { // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP) { // Compute the reciprocal 1.0 / c2. Index: test/CodeGen/AMDGPU/fdiv.f16.ll =================================================================== --- test/CodeGen/AMDGPU/fdiv.f16.ll +++ test/CodeGen/AMDGPU/fdiv.f16.ll @@ -218,7 +218,7 @@ } ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0x3dccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0x2e66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] @@ -230,7 +230,7 @@ } ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f16: -; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdcccccd, v{{[0-9]+}} +; SI: v_mul_f32_e32 v{{[0-9]+}}, 0xbdccc000, v{{[0-9]+}} ; GFX8_9: v_mul_f16_e32 [[MUL:v[0-9]+]], 0xae66, v{{[0-9]+}} ; GFX8_9: buffer_store_short [[MUL]] Index: test/CodeGen/X86/fmf-flags.ll =================================================================== --- test/CodeGen/X86/fmf-flags.ll +++ test/CodeGen/X86/fmf-flags.ll @@ -8,17 +8,11 @@ ; X64-LABEL: fast_recip_sqrt: ; X64: # %bb.0: ; X64-NEXT: rsqrtss %xmm0, %xmm1 -; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: cmpeqss %xmm0, %xmm2 ; X64-NEXT: mulss %xmm1, %xmm0 -; X64-NEXT: movss {{.*}}(%rip), %xmm3 -; X64-NEXT: mulss %xmm0, %xmm3 ; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: addss {{.*}}(%rip), %xmm0 -; X64-NEXT: mulss %xmm3, %xmm0 -; X64-NEXT: andnps %xmm0, %xmm2 -; X64-NEXT: movss {{.*}}(%rip), %xmm0 -; X64-NEXT: divss %xmm2, %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 ; X64-NEXT: retq ; ; X86-LABEL: fast_recip_sqrt: @@ -89,10 +83,14 @@ define float @not_so_fast_recip_sqrt(float %x) { ; X64-LABEL: not_so_fast_recip_sqrt: ; X64: # %bb.0: -; X64-NEXT: sqrtss %xmm0, %xmm1 -; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X64-NEXT: divss %xmm1, %xmm0 -; X64-NEXT: movss %xmm1, {{.*}}(%rip) +; X64-NEXT: rsqrtss %xmm0, %xmm1 +; X64-NEXT: sqrtss %xmm0, %xmm2 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: addss {{.*}}(%rip), %xmm0 +; X64-NEXT: mulss {{.*}}(%rip), %xmm1 +; X64-NEXT: mulss %xmm1, %xmm0 +; X64-NEXT: movss %xmm2, sqrt1(%rip) ; X64-NEXT: retq ; ; X86-LABEL: not_so_fast_recip_sqrt: