Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14004,10 +14004,13 @@ if (N1CFP && N1CFP->isExactlyValue(+2.0)) return DAG.getNode(ISD::FADD, DL, VT, N0, N0); - // fold (fmul X, -1.0) -> (fneg X) - if (N1CFP && N1CFP->isExactlyValue(-1.0)) - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N0); + // fold (fmul X, -1.0) -> (fsub -0.0, X) + if (N1CFP && N1CFP->isExactlyValue(-1.0)) { + if (!LegalOperations || TLI.isOperationLegal(ISD::FSUB, VT)) { + return DAG.getNode(ISD::FSUB, DL, VT, + DAG.getConstantFP(-0.0, DL, VT), N0, Flags); + } + } // -N0 * -N1 --> N0 * N1 TargetLowering::NegatibleCost CostN0 = Index: llvm/test/CodeGen/AArch64/arm64-fmadd.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fmadd.ll +++ llvm/test/CodeGen/AArch64/arm64-fmadd.ll @@ -82,7 +82,7 @@ ; CHECK-NEXT: fmsub d0, d0, d1, d2 ; CHECK-NEXT: ret entry: - %mul = fmul double %b, -1.000000e+00 + %mul = fneg double %b %0 = tail call double @llvm.fma.f64(double %a, double %mul, double %c) ret double %0 } @@ -93,7 +93,7 @@ ; CHECK-NEXT: fmsub d0, d1, d0, d2 ; CHECK-NEXT: ret entry: - %mul = fmul double %b, -1.000000e+00 + %mul = fneg double %b %0 = tail call double @llvm.fma.f64(double %mul, double %a, double %c) ret double %0 } @@ -104,7 +104,7 @@ ; CHECK-NEXT: fnmsub d0, d0, d1, d2 ; CHECK-NEXT: ret entry: - %mul = fmul double %c, -1.000000e+00 + %mul = fneg double %c %0 = tail call double @llvm.fma.f64(double %a, double %b, double %mul) ret double %0 } Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll +++ llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll @@ -22,7 +22,7 @@ ; CHECK-LABEL: fms16: ; CHECK: fmsub h0, h0, h1, h2 entry: - %mul = fmul half %b, -1.000000e+00 + %mul = fneg half %b %0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c) ret half %0 } @@ -32,7 +32,7 @@ ; CHECK: fmsub h0, h1, h0, h2 ; CHECK-NEXT: ret entry: - %mul = fmul half %b, -1.000000e+00 + %mul = fneg half %b %0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c) ret half %0 } @@ -42,7 +42,7 @@ ; CHECK: fnmsub h0, h0, h1, h2 ; CHECK-NEXT: ret entry: - %mul = fmul half %c, -1.000000e+00 + %mul = fneg half %c %0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul) ret half %0 } Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -2597,6 +2597,51 @@ ret <2 x float> %i6 } +; This expects denormal flushing, so can't turn this fmul into fneg +; TODO: Keeping this as fmul saves encoding size +; GCN-LABEL: {{^}}nnan_fmul_neg1_to_fneg: +; GCN: v_sub_f32_e32 [[TMP:v[0-9]+]], 0x80000000, v0 +; GCN-NEXT: v_mul_f32_e32 v0, [[TMP]], v1 +define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 { + %mul = fmul float %x, -1.0 + %add = fmul nnan float %mul, %y + ret float %add +} + +; It's legal to turn this fmul into an fneg since denormals are +; preserved and we know an snan can't happen from the flag. +; GCN-LABEL: {{^}}denormal_fmul_neg1_to_fneg: +; GCN: v_mul_f32_e64 v0, -v0, v1 +; GCN-NEXT: s_setpc_b64 +define float @denormal_fmul_neg1_to_fneg(float %x, float %y) { + %mul = fmul nnan float %x, -1.0 + %add = fmul float %mul, %y + ret float %add +} + +; know the source can't be an snan +; GCN-LABEL: {{^}}denorm_snan_fmul_neg1_to_fneg: +; GCN: v_mul_f32_e64 [[TMP:v[0-9]+]], v0, -v0 +; GCN: v_mul_f32_e32 v0, [[TMP]], v1 +; GCN-NEXT: s_setpc_b64 +define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) { + %canonical = fmul float %x, %x + %mul = fmul float %canonical, -1.0 + %add = fmul float %mul, %y + ret float %add +} + +; GCN-LABEL: {{^}}flush_snan_fmul_neg1_to_fneg: +; GCN: v_mul_f32_e32 [[TMP0:v[0-9]+]], 1.0, v0 +; GCN: v_sub_f32_e32 [[TMP1:v[0-9]+]], 0x80000000, [[TMP0]] +; GCN-NEXT: v_mul_f32_e32 v0, [[TMP1]], v1 +define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 { + %quiet = call float @llvm.canonicalize.f32(float %x) + %mul = fmul float %quiet, -1.0 + %add = fmul float %mul, %y + ret float %add +} + declare i32 @llvm.amdgcn.workitem.id.x() #1 declare float @llvm.fma.f32(float, float, float) #1 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) Index: llvm/test/CodeGen/ARM/fnegs.ll =================================================================== --- llvm/test/CodeGen/ARM/fnegs.ll +++ llvm/test/CodeGen/ARM/fnegs.ll @@ -49,7 +49,7 @@ define float @test2(float* %a) { entry: %0 = load float, float* %a, align 4 ; [#uses=2] - %1 = fmul float -1.000000e+00, %0 ; [#uses=2] + %1 = fneg float %0 ; [#uses=2] %2 = fpext float %1 to double ; [#uses=1] %3 = fcmp olt double %2, 1.234000e+00 ; [#uses=1] %retval = select i1 %3, float %1, float %0 ; [#uses=1] Index: llvm/test/CodeGen/Hexagon/opt-fneg.ll =================================================================== --- llvm/test/CodeGen/Hexagon/opt-fneg.ll +++ llvm/test/CodeGen/Hexagon/opt-fneg.ll @@ -3,6 +3,7 @@ define float @foo(float %x) nounwind { entry: +; CHECK-LABEL: foo: ; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31) %x.addr = alloca float, align 4 store float %x, float* %x.addr, align 4 @@ -13,14 +14,25 @@ define float @bar(float %x) nounwind { entry: +; CHECK-LABEL: bar: ; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31) %sub = fsub float -0.000000e+00, %x ret float %sub } -define float @baz(float %x) nounwind { +define float @baz0(float %x) nounwind { entry: +; CHECK-LABEL: baz0: ; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31) - %conv1 = fmul float %x, -1.000000e+00 + %conv1 = fmul nnan float %x, -1.000000e+00 + ret float %conv1 +} + +define float @baz1(float %x) nounwind { +entry: + %not.nan = fadd nnan float %x, %x +; CHECK-LABEL: baz1: +; CHECK: r{{[0-9]+}} = togglebit(r{{[0-9]+}},#31) + %conv1 = fmul float %not.nan, -1.000000e+00 ret float %conv1 } Index: llvm/test/CodeGen/PowerPC/combine-fneg.ll =================================================================== --- llvm/test/CodeGen/PowerPC/combine-fneg.ll +++ llvm/test/CodeGen/PowerPC/combine-fneg.ll @@ -13,10 +13,10 @@ ; CHECK-NEXT: xvredp 2, 0 ; CHECK-NEXT: xxswapd 1, 1 ; CHECK-NEXT: xxlor 3, 1, 1 -; CHECK-NEXT: xvnmsubadp 3, 0, 2 -; CHECK-NEXT: xvmaddadp 2, 2, 3 -; CHECK-NEXT: xvnmsubadp 1, 0, 2 -; CHECK-NEXT: xvnmaddadp 2, 2, 1 +; CHECK-NEXT: xvmaddadp 3, 0, 2 +; CHECK-NEXT: xvnmsubadp 2, 2, 3 +; CHECK-NEXT: xvmaddadp 1, 0, 2 +; CHECK-NEXT: xvmsubadp 2, 2, 1 ; CHECK-NEXT: xvmuldp 34, 34, 2 ; CHECK-NEXT: xvmuldp 35, 35, 2 ; CHECK-NEXT: blr