Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -848,7 +848,22 @@ return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, ForCodeSize, Depth + 1); + case ISD::FMA: + case ISD::FMAD: { + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + char V2 = isNegatibleForFree(Op.getOperand(2), LegalOperations, TLI, + Options, ForCodeSize, Depth + 1); + if (!V2) + return 0; + + if (char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, + Options, ForCodeSize, Depth + 1)) + return V0; + return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, + Options, ForCodeSize, Depth + 1); + } case ISD::FP_EXTEND: case ISD::FP_ROUND: case ISD::FSIN: @@ -937,6 +952,28 @@ LegalOperations, ForCodeSize, Depth + 1), Flags); + case ISD::FMA: + case ISD::FMAD: { + SDValue Neg2 = GetNegatedExpression(Op.getOperand(2), DAG, LegalOperations, + ForCodeSize, Depth + 1); + + // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z)) + if (isNegatibleForFree(Op.getOperand(0), LegalOperations, + DAG.getTargetLoweringInfo(), &Options, ForCodeSize, + Depth + 1)) { + SDValue Neg0 = GetNegatedExpression( + Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0, + Op.getOperand(1), Neg2, Flags); + } + + // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z)) + SDValue Neg1 = GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, + ForCodeSize, Depth + 1); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), + Op.getOperand(0), Neg1, Neg2, Flags); + } + case ISD::FP_EXTEND: case ISD::FSIN: return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Index: test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-combines.ll +++ test/CodeGen/AMDGPU/fneg-combines.ll @@ -1230,13 +1230,9 @@ ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] - -; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]] -; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] - -; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] -; GCN-NSZ-NOT: [[FMA]] -; GCN-NSZ: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] +; GCN: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] +; GCN-NOT: [[FMA]] +; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[FMA]] define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { %tid = call i32 @llvm.amdgcn.workitem.id.x() %tid.ext = sext i32 %tid to i64