Index: lib/Target/AMDGPU/AMDGPUISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3712,6 +3712,8 @@ RHS = RHS.getOperand(0); SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags()); + if (Res.getOpcode() == ISD::FNEG) + return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; @@ -3731,6 +3733,8 @@ RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags()); + if (Res.getOpcode() == ISD::FNEG) + return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; @@ -3758,6 +3762,8 @@ RHS = RHS.getOperand(0); SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS); + if (Res.getOpcode() == ISD::FNEG) + return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; @@ -3786,6 +3792,8 @@ unsigned Opposite = inverseMinMax(Opc); SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); + if (Res.getOpcode() == ISD::FNEG) + return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; @@ -3796,6 +3804,8 @@ Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags()); SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags()); + if (Res.getOpcode() == ISD::FNEG) + return SDValue(); // Op got folded away. if (!N0.hasOneUse()) DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); return Res; Index: test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- test/CodeGen/AMDGPU/fneg-combines.ll +++ test/CodeGen/AMDGPU/fneg-combines.ll @@ -214,6 +214,23 @@ ret void } +; This one asserted with -enable-no-signed-zeros-fp-math +; GCN-LABEL: {{^}}fneg_fadd_0: + +define amdgpu_ps float @fneg_fadd_0(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) local_unnamed_addr #0 { +.entry: + %tmp7 = fdiv float 1.000000e+00, %tmp6 + %tmp8 = fmul float 0.000000e+00, %tmp7 + %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 + %.i188 = fadd float %tmp9, 0.000000e+00 + %tmp10 = fcmp uge float %.i188, %tmp2 + %tmp11 = fsub float -0.000000e+00, %.i188 + %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 + %tmp12 = fcmp ule float %.i092, 0.000000e+00 + %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 + ret float %.i198 +} + ; -------------------------------------------------------------------------------- ; fmul tests ; --------------------------------------------------------------------------------