Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2642,11 +2642,13 @@ return false; } - /// Returns true if the FADD or FSUB node passed could legally be combined with - /// an fmul to form an ISD::FMAD. - virtual bool isFMADLegalForFAddFSub(const SelectionDAG &DAG, - const SDNode *N) const { - assert(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB); + /// Returns true if be combined with to form an ISD::FMAD. \p N may be an + /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an + /// fadd/fsub. + virtual bool isFMADLegal(const SelectionDAG &DAG, + const SDNode *N) const { + assert(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || + N->getOpcode() == ISD::FMUL); return isOperationLegal(ISD::FMAD, N->getValueType(0)); } Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11728,7 +11728,7 @@ const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = @@ -11945,7 +11945,7 @@ const TargetOptions &Options = DAG.getTarget().Options; // Floating-point multiply-add with intermediate rounding. - bool HasFMAD = (LegalOperations && TLI.isFMADLegalForFAddFSub(DAG, N)); + bool HasFMAD = (LegalOperations && TLI.isFMADLegal(DAG, N)); // Floating-point multiply-add without intermediate rounding. bool HasFMA = @@ -12289,7 +12289,7 @@ // Floating-point multiply-add with intermediate rounding. This can result // in a less precise result due to the changed rounding order. bool HasFMAD = Options.UnsafeFPMath && - (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT)); + (LegalOperations && TLI.isFMADLegal(DAG, N)); // No valid opcode, do not combine. if (!HasFMAD && !HasFMA) Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -358,8 +358,8 @@ MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override; - bool isFMADLegalForFAddFSub(const SelectionDAG &DAG, - const SDNode *N) const override; + bool isFMADLegal(const SelectionDAG &DAG, + const SDNode *N) const override; SDValue splitUnaryVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue splitBinaryVectorOp(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3946,8 +3946,8 @@ return false; } -bool SITargetLowering::isFMADLegalForFAddFSub(const SelectionDAG &DAG, - const SDNode *N) const { +bool SITargetLowering::isFMADLegal(const SelectionDAG &DAG, + const SDNode *N) const { // TODO: Check future ftz flag // v_mad_f32/v_mac_f32 do not support denormals. EVT VT = N->getValueType(0); Index: llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fmad-formation-fmul-distribute-denormal-mode.ll @@ -0,0 +1,170 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s +; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s +; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s +; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,NOFUSE %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=GCN,FMA %s + +; RUN: llc -march=amdgcn -mcpu=tahiti -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s +; RUN: llc -march=amdgcn -mcpu=verde -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s +; RUN: llc -march=amdgcn -mcpu=fiji -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s +; RUN: llc -march=amdgcn -mcpu=tonga -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s +; RUN: llc -march=amdgcn -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=GCN,FMAD %s + +; Check for incorrect fmad formation when distributing + +define float @unsafe_fmul_fadd_distribute_fast_f32(float %arg0, float %arg1) #0 { +; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_f32: +; FMA: ; %bb.0: +; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMA-NEXT: v_fma_f32 v0, v1, v0, v0 +; FMA-NEXT: s_setpc_b64 s[30:31] +; +; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_f32: +; NOFUSE: ; %bb.0: +; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; NOFUSE-NEXT: v_add_f32_e32 v1, 1.0, v1 +; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1 +; NOFUSE-NEXT: s_setpc_b64 s[30:31] +; +; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_f32: +; FMAD: ; %bb.0: +; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMAD-NEXT: v_mac_f32_e32 v0, v1, v0 +; FMAD-NEXT: s_setpc_b64 s[30:31] + %add = fadd fast float %arg1, 1.0 + %tmp1 = fmul fast float %arg0, %add + ret float %tmp1 +} + +define float @unsafe_fmul_fsub_distribute_fast_f32(float %arg0, float %arg1) #0 { +; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_f32: +; FMA: ; %bb.0: +; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMA-NEXT: v_fma_f32 v0, -v1, v0, v0 +; FMA-NEXT: s_setpc_b64 s[30:31] +; +; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_f32: +; NOFUSE: ; %bb.0: +; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; NOFUSE-NEXT: v_sub_f32_e32 v1, 1.0, v1 +; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v1 +; NOFUSE-NEXT: s_setpc_b64 s[30:31] +; +; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_f32: +; FMAD: ; %bb.0: +; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMAD-NEXT: v_mad_f32 v0, -v1, v0, v0 +; FMAD-NEXT: s_setpc_b64 s[30:31] + %add = fsub fast float 1.0, %arg1 + %tmp1 = fmul fast float %arg0, %add + ret float %tmp1 +} + +define <2 x float> @unsafe_fmul_fadd_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 { +; FMA-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32: +; FMA: ; %bb.0: +; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMA-NEXT: v_fma_f32 v0, v2, v0, v0 +; FMA-NEXT: v_fma_f32 v1, v3, v1, v1 +; FMA-NEXT: s_setpc_b64 s[30:31] +; +; NOFUSE-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32: +; NOFUSE: ; %bb.0: +; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; NOFUSE-NEXT: v_add_f32_e32 v3, 1.0, v3 +; NOFUSE-NEXT: v_add_f32_e32 v2, 1.0, v2 +; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2 +; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3 +; NOFUSE-NEXT: s_setpc_b64 s[30:31] +; +; FMAD-LABEL: unsafe_fmul_fadd_distribute_fast_v2f32: +; FMAD: ; %bb.0: +; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMAD-NEXT: v_mac_f32_e32 v0, v2, v0 +; FMAD-NEXT: v_mac_f32_e32 v1, v3, v1 +; FMAD-NEXT: s_setpc_b64 s[30:31] + %add = fadd fast <2 x float> %arg1, + %tmp1 = fmul fast <2 x float> %arg0, %add + ret <2 x float> %tmp1 +} + +define <2 x float> @unsafe_fmul_fsub_distribute_fast_v2f32(<2 x float> %arg0, <2 x float> %arg1) #0 { +; FMA-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32: +; FMA: ; %bb.0: +; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMA-NEXT: v_fma_f32 v0, -v2, v0, v0 +; FMA-NEXT: v_fma_f32 v1, -v3, v1, v1 +; FMA-NEXT: s_setpc_b64 s[30:31] +; +; NOFUSE-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32: +; NOFUSE: ; %bb.0: +; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; NOFUSE-NEXT: v_sub_f32_e32 v3, 1.0, v3 +; NOFUSE-NEXT: v_sub_f32_e32 v2, 1.0, v2 +; NOFUSE-NEXT: v_mul_f32_e32 v0, v0, v2 +; NOFUSE-NEXT: v_mul_f32_e32 v1, v1, v3 +; NOFUSE-NEXT: s_setpc_b64 s[30:31] +; +; FMAD-LABEL: unsafe_fmul_fsub_distribute_fast_v2f32: +; FMAD: ; %bb.0: +; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMAD-NEXT: v_mad_f32 v0, -v2, v0, v0 +; FMAD-NEXT: v_mad_f32 v1, -v3, v1, v1 +; FMAD-NEXT: s_setpc_b64 s[30:31] + %add = fsub fast <2 x float> , %arg1 + %tmp1 = fmul fast <2 x float> %arg0, %add + ret <2 x float> %tmp1 +} + +define <2 x float> @unsafe_fast_fmul_fadd_distribute_post_legalize_f32(float %arg0, <2 x float> %arg1) #0 { +; FMA-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32: +; FMA: ; %bb.0: +; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMA-NEXT: v_fma_f32 v0, v0, v1, v1 +; FMA-NEXT: s_setpc_b64 s[30:31] +; +; NOFUSE-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32: +; NOFUSE: ; %bb.0: +; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; NOFUSE-NEXT: v_add_f32_e32 v0, 1.0, v0 +; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0 +; NOFUSE-NEXT: s_setpc_b64 s[30:31] +; +; FMAD-LABEL: unsafe_fast_fmul_fadd_distribute_post_legalize_f32: +; FMAD: ; %bb.0: +; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMAD-NEXT: v_mad_f32 v0, v0, v1, v1 +; FMAD-NEXT: s_setpc_b64 s[30:31] + %add = fadd fast float %arg0, 1.0 + %splat = insertelement <2 x float> undef, float %add, i32 0 + %tmp1 = fmul fast <2 x float> %arg1, %splat + ret <2 x float> %tmp1 +} + +define <2 x float> @unsafe_fast_fmul_fsub_ditribute_post_legalize(float %arg0, <2 x float> %arg1) #0 { +; FMA-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize: +; FMA: ; %bb.0: +; FMA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMA-NEXT: v_fma_f32 v0, -v0, v1, v1 +; FMA-NEXT: s_setpc_b64 s[30:31] +; +; NOFUSE-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize: +; NOFUSE: ; %bb.0: +; NOFUSE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; NOFUSE-NEXT: v_sub_f32_e32 v0, 1.0, v0 +; NOFUSE-NEXT: v_mul_f32_e32 v0, v1, v0 +; NOFUSE-NEXT: s_setpc_b64 s[30:31] +; +; FMAD-LABEL: unsafe_fast_fmul_fsub_ditribute_post_legalize: +; FMAD: ; %bb.0: +; FMAD-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; FMAD-NEXT: v_mad_f32 v0, -v0, v1, v1 +; FMAD-NEXT: s_setpc_b64 s[30:31] + %sub = fsub fast float 1.0, %arg0 + %splat = insertelement <2 x float> undef, float %sub, i32 0 + %tmp1 = fmul fast <2 x float> %arg1, %splat + ret <2 x float> %tmp1 +} + +attributes #0 = { "no-infs-fp-math"="true" "unsafe-fp-math"="true" }