Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -9271,11 +9271,12 @@ // Faster 2.5 ULP division that does not support denormals. SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const { + SDNodeFlags Flags = Op->getFlags(); SDLoc SL(Op); SDValue LHS = Op.getOperand(1); SDValue RHS = Op.getOperand(2); - SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS); + SDValue r1 = DAG.getNode(ISD::FABS, SL, MVT::f32, RHS, Flags); const APFloat K0Val(0x1p+96f); const SDValue K0 = DAG.getConstantFP(K0Val, SL, MVT::f32); @@ -9290,17 +9291,16 @@ SDValue r2 = DAG.getSetCC(SL, SetCCVT, r1, K0, ISD::SETOGT); - SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One); + SDValue r3 = DAG.getNode(ISD::SELECT, SL, MVT::f32, r2, K1, One, Flags); - // TODO: Should this propagate fast-math-flags? - r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3); + r1 = DAG.getNode(ISD::FMUL, SL, MVT::f32, RHS, r3, Flags); // rcp does not support denormals. - SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1); + SDValue r0 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, r1, Flags); - SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0); + SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, LHS, r0, Flags); - return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul); + return DAG.getNode(ISD::FMUL, SL, MVT::f32, r3, Mul, Flags); } // Returns immediate value for setting the F32 denorm mode when using the Index: llvm/test/CodeGen/AMDGPU/fdiv.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fdiv.ll +++ llvm/test/CodeGen/AMDGPU/fdiv.ll @@ -3828,8 +3828,7 @@ ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_mul_f32_e32 v0, v3, v0 -; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_fma_f32 v0, v3, v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_fdiv_f32_daz_25ulp_contractable_user: @@ -5087,8 +5086,7 @@ ; GFX11-NEXT: v_rcp_f32_e32 v1, v1 ; GFX11-NEXT: s_waitcnt_depctr 0xfff ; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 -; GFX11-NEXT: v_mul_f32_e32 v0, v3, v0 -; GFX11-NEXT: v_add_f32_e32 v0, v0, v2 +; GFX11-NEXT: v_fma_f32 v0, v3, v0, v2 ; GFX11-NEXT: s_setpc_b64 s[30:31] ; ; EG-LABEL: v_fdiv_f32_daz_25ulp__nnan_ninf_contractable_user: