diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -13367,18 +13367,21 @@ } // (fsub -0.0, N1) -> -N1 - // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the - // FSUB does not specify the sign bit of a NaN. Also note that for - // the same reason, the inverse transform is not safe, unless fast math - // flags are in play. if (N0CFP && N0CFP->isZero()) { if (N0CFP->isNegative() || (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) { - if (SDValue NegN1 = - TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) - return NegN1; - if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, DL, VT, N1); + // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are + // flushed to zero, unless all users treat denorms as zero (DAZ). + // FIXME: This transform will change the sign of a NaN and the behavior + // of a signaling NaN. It is only valid when a NoNaN flag is present. + DenormalMode DenormMode = DAG.getDenormalMode(VT); + if (DenormMode == DenormalMode::getIEEE()) { + if (SDValue NegN1 = + TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize)) + return NegN1; + if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) + return DAG.getNode(ISD::FNEG, DL, VT, N1); + } } } diff --git a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll --- a/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp-modifier.ll @@ -62,7 +62,7 @@ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 %floor = call float @llvm.floor.f32(float %a) - %neg.floor = fsub float -0.0, %floor + %neg.floor = fneg float %floor %max = call float @llvm.maxnum.f32(float %neg.floor, float 0.0) %clamp = call float @llvm.minnum.f32(float %max, float 1.0) store float %clamp, float addrspace(1)* %out.gep diff --git a/llvm/test/CodeGen/AMDGPU/clamp.ll b/llvm/test/CodeGen/AMDGPU/clamp.ll --- a/llvm/test/CodeGen/AMDGPU/clamp.ll +++ b/llvm/test/CodeGen/AMDGPU/clamp.ll @@ -25,7 +25,7 @@ %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0) %med = call float @llvm.minnum.f32(float %max, float 1.0) @@ -42,7 +42,7 @@ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid %a = load float, float addrspace(1)* %gep0 %fabs.a = call float @llvm.fabs.f32(float %a) - %fneg.fabs.a = fsub float -0.0, %fabs.a + %fneg.fabs.a = fneg float %fabs.a %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0) %med = call float @llvm.minnum.f32(float %max, float 1.0) diff --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll --- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll +++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll @@ -56,7 +56,7 @@ ; GCN: global_store_dword v{{[0-9]+}}, [[OUT]], s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) { %load = load float, float addrspace(1)* %arg, align 4 - %neg = fsub float -0.000000e+00, %load + %neg = fneg float %load %div = fdiv float 1.000000e+00, %neg, !fpmath !0 store float %div, float addrspace(1)* %arg, align 4 ret void @@ -188,7 +188,7 @@ ; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 - %neg = fsub <4 x float> , %load + %neg = fneg <4 x float> %load %div = fdiv <4 x float> , %neg, !fpmath !0 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 ret void @@ -226,7 +226,7 @@ ; GCN-FLUSH: global_store_dwordx4 v{{[0-9]+}}, v{{\[}}[[OUT0]]:[[OUT3]]], s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 - %neg = fsub <4 x float> , %load + %neg = fneg <4 x float> %load %div = fdiv <4 x float> , %neg, !fpmath !0 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 ret void @@ -372,7 +372,7 @@ ; GCN: global_store_dword v{{[0-9]+}}, [[RCP]], s{{\[[0-9]+:[0-9]+\]}} define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) { %load = load float, float addrspace(1)* %arg, align 4 - %neg = fsub float -0.000000e+00, %load, !fpmath !0 + %neg = fneg float %load, !fpmath !0 %div = fdiv fast float 1.000000e+00, %neg store float %div, float addrspace(1)* %arg, align 4 ret void diff --git a/llvm/test/CodeGen/AMDGPU/fma-combine.ll b/llvm/test/CodeGen/AMDGPU/fma-combine.ll --- a/llvm/test/CodeGen/AMDGPU/fma-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/fma-combine.ll @@ -647,7 +647,7 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fsub float -0.000000e+00, %r1 + %r1.fneg = fneg float %r1 %r3 = tail call float @llvm.fma.f32(float -2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out @@ -669,7 +669,7 @@ %r1 = load volatile float, float addrspace(1)* %gep.0 %r2 = load volatile float, float addrspace(1)* %gep.1 - %r1.fneg = fsub float -0.000000e+00, %r1 + %r1.fneg = fneg float %r1 %r3 = tail call float @llvm.fma.f32(float 2.0, float %r1.fneg, float %r2) store float %r3, float addrspace(1)* %gep.out diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll --- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll @@ -482,7 +482,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %min = call float @llvm.minnum.f32(float %a, float %a) - %min.fneg = fsub float -0.0, %min + %min.fneg = fneg float %min store float %min.fneg, float addrspace(1)* %out.gep ret void } @@ -493,7 +493,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 { %min = call float @llvm.minnum.f32(float %a, float %a) - %min.fneg = fsub float -0.0, %min + %min.fneg = fneg float %min ret float %min.fneg } @@ -887,7 +887,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %max = call float @llvm.maxnum.f32(float %a, float %a) - %max.fneg = fsub float -0.0, %max + %max.fneg = fneg float %max store float %max.fneg, float addrspace(1)* %out.gep ret void } @@ -898,7 +898,7 @@ ; GCN-NEXT: ; return define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 { %max = call float @llvm.maxnum.f32(float %a, float %a) - %max.fneg = fsub float -0.0, %max + %max.fneg = fneg float %max ret float %max.fneg } @@ -2039,7 +2039,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %sin = call float @llvm.amdgcn.sin.f32(float %a) - %fneg = fsub float -0.0, %sin + %fneg = fneg float %sin store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2059,7 +2059,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %trunc = call float @llvm.trunc.f32(float %a) - %fneg = fsub float -0.0, %trunc + %fneg = fneg float %trunc store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2086,7 +2086,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %round = call float @llvm.round.f32(float %a) - %fneg = fsub float -0.0, %round + %fneg = fneg float %round store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2106,7 +2106,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %rint = call float @llvm.rint.f32(float %a) - %fneg = fsub float -0.0, %rint + %fneg = fneg float %rint store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2126,7 +2126,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %nearbyint = call float @llvm.nearbyint.f32(float %a) - %fneg = fsub float -0.0, %nearbyint + %fneg = fneg float %nearbyint store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2146,7 +2146,7 @@ %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext %a = load volatile float, float addrspace(1)* %a.gep %trunc = call float @llvm.canonicalize.f32(float %a) - %fneg = fsub float -0.0, %trunc + %fneg = fneg float %trunc store float %fneg, float addrspace(1)* %out.gep ret void } @@ -2170,7 +2170,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0) %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0) store volatile float %intrp0, float addrspace(1)* %out.gep @@ -2193,7 +2193,7 @@ %a = load volatile float, float addrspace(1)* %a.gep %b = load volatile float, float addrspace(1)* %b.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0) %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0) store volatile float %intrp0, float addrspace(1)* %out.gep @@ -2230,7 +2230,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul %cmp0 = icmp eq i32 %d, 0 br i1 %cmp0, label %if, label %endif @@ -2266,7 +2266,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul call void asm sideeffect "; use $0", "v"(float %fneg) #0 store volatile float %fneg, float addrspace(1)* %out.gep ret void @@ -2295,7 +2295,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep %mul = fmul float %a, %b - %fneg = fsub float -0.0, %mul + %fneg = fneg float %mul call void asm sideeffect "; use $0", "v"(float %fneg) #0 store volatile float %mul, float addrspace(1)* %out.gep ret void @@ -2328,7 +2328,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0) @@ -2360,7 +2360,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %mul0 = fmul float %fneg.a, %b %mul1 = fmul float %fneg.a, %c @@ -2391,7 +2391,7 @@ %b = load volatile float, float addrspace(1)* %b.gep %c = load volatile float, float addrspace(1)* %c.gep - %fneg.a = fsub float -0.0, %a + %fneg.a = fneg float %a %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0) %mul1 = fmul float %fneg.a, %c @@ -2433,7 +2433,7 @@ %d = load volatile float, float addrspace(1)* %d.gep %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0) - %fneg.fma0 = fsub float -0.0, %fma0 + %fneg.fma0 = fneg float %fma0 %mul1 = fmul float %fneg.fma0, %c %mul2 = fmul float %fneg.fma0, %d @@ -2501,7 +2501,7 @@ %d = load volatile float, float addrspace(1)* %d.gep %trunc.a = call float @llvm.trunc.f32(float %a) - %trunc.fneg.a = fsub float -0.0, %trunc.a + %trunc.fneg.a = fneg float %trunc.a %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) store volatile float %fma0, float addrspace(1)* %out ret void @@ -2531,7 +2531,7 @@ %d = load volatile float, float addrspace(1)* %d.gep %trunc.a = call float @llvm.trunc.f32(float %a) - %trunc.fneg.a = fsub float -0.0, %trunc.a + %trunc.fneg.a = fneg float %trunc.a %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) %mul1 = fmul float %trunc.a, %d store volatile float %fma0, float addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/fpext-free.ll b/llvm/test/CodeGen/AMDGPU/fpext-free.ll --- a/llvm/test/CodeGen/AMDGPU/fpext-free.ll +++ b/llvm/test/CodeGen/AMDGPU/fpext-free.ll @@ -288,7 +288,7 @@ entry: %mul = fmul half %x, %y %mul.ext = fpext half %mul to float - %neg.mul.ext = fsub float -0.0, %mul.ext + %neg.mul.ext = fneg float %mul.ext %add = fsub float %neg.mul.ext, %z ret float %add } diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -99,7 +99,7 @@ float addrspace(1)* %a) { entry: %a.val = load float, float addrspace(1)* %a - %a.fneg = fsub float -0.0, %a.val + %a.fneg = fneg float %a.val %r.val = fptrunc float %a.fneg to half store half %r.val, half addrspace(1)* %r ret void @@ -132,7 +132,7 @@ entry: %a.val = load float, float addrspace(1)* %a %a.fabs = call float @llvm.fabs.f32(float %a.val) - %a.fneg.fabs = fsub float -0.0, %a.fabs + %a.fneg.fabs = fneg float %a.fabs %r.val = fptrunc float %a.fneg.fabs to half store half %r.val, half addrspace(1)* %r ret void diff --git a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll --- a/llvm/test/CodeGen/AMDGPU/known-never-snan.ll +++ b/llvm/test/CodeGen/AMDGPU/known-never-snan.ll @@ -26,7 +26,7 @@ ; GCN-NEXT: v_med3_f32 v0, -v0, 2.0, 4.0 ; GCN-NEXT: s_setpc_b64 s[30:31] %a.nnan.add = fdiv nnan float 1.0, %a, !fpmath !0 - %known.not.snan = fsub float -0.0, %a.nnan.add + %known.not.snan = fneg float %a.nnan.add %max = call float @llvm.maxnum.f32(float %known.not.snan, float 2.0) %med = call float @llvm.minnum.f32(float %max, float 4.0) ret float %med diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.ll @@ -74,7 +74,7 @@ %a.val = load float, float addrspace(1)* %a %b.val = load float, float addrspace(1)* %b %c.val = load float, float addrspace(1)* %c - %neg.b = fsub float -0.0, %b.val + %neg.b = fneg float %b.val %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.b, float %c.val) store float %r.val, float addrspace(1)* %r ret void @@ -107,7 +107,7 @@ %b.val = load float, float addrspace(1)* %b %c.val = load float, float addrspace(1)* %c %abs.b = call float @llvm.fabs.f32(float %b.val) - %neg.abs.b = fsub float -0.0, %abs.b + %neg.abs.b = fneg float %abs.b %r.val = call float @llvm.amdgcn.fmad.ftz.f32(float %a.val, float %neg.abs.b, float %c.val) store float %r.val, float addrspace(1)* %r ret void diff --git a/llvm/test/CodeGen/AMDGPU/mad-combine.ll b/llvm/test/CodeGen/AMDGPU/mad-combine.ll --- a/llvm/test/CodeGen/AMDGPU/mad-combine.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-combine.ll @@ -296,7 +296,7 @@ %c = load volatile float, float addrspace(1)* %gep.2 %mul = fmul float %a, %b - %mul.neg = fsub float -0.0, %mul + %mul.neg = fneg float %mul %fma = fsub float %mul.neg, %c store float %fma, float addrspace(1)* %gep.out @@ -338,7 +338,7 @@ %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b - %mul.neg = fsub float -0.0, %mul + %mul.neg = fneg float %mul %fma0 = fsub float %mul.neg, %c %fma1 = fsub float %mul.neg, %d @@ -382,7 +382,7 @@ %d = load volatile float, float addrspace(1)* %gep.3 %mul = fmul float %a, %b - %mul.neg = fsub float -0.0, %mul + %mul.neg = fneg float %mul %fma0 = fsub float %mul.neg, %c %fma1 = fsub float %mul, %d diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll --- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll +++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll @@ -109,7 +109,7 @@ %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float - %src0.ext.neg = fsub float -0.0, %src0.ext + %src0.ext.neg = fneg float %src0.ext %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg, float %src1.ext, float %src2.ext) ret float %result } @@ -143,7 +143,7 @@ %src1.ext = fpext half %src1 to float %src2.ext = fpext half %src2 to float %src0.ext.abs = call float @llvm.fabs.f32(float %src0.ext) - %src0.ext.neg.abs = fsub float -0.0, %src0.ext.abs + %src0.ext.neg.abs = fneg float %src0.ext.abs %result = tail call float @llvm.fmuladd.f32(float %src0.ext.neg.abs, float %src1.ext, float %src2.ext) ret float %result } @@ -172,7 +172,7 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %src2) #0 { %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float - %src2.neg = fsub float -0.0, %src2 + %src2.neg = fneg float %src2 %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg) ret float %result } @@ -203,7 +203,7 @@ %src0.ext = fpext half %src0 to float %src1.ext = fpext half %src1 to float %src2.abs = call float @llvm.fabs.f32(float %src2) - %src2.neg.abs = fsub float -0.0, %src2.abs + %src2.neg.abs = fneg float %src2.abs %result = tail call float @llvm.fmuladd.f32(float %src0.ext, float %src1.ext, float %src2.neg.abs) ret float %result } diff --git a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll --- a/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll +++ b/llvm/test/CodeGen/AMDGPU/rcp-pattern.ll @@ -94,7 +94,7 @@ ; GCN: buffer_store_dword [[RCP]] define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 { %src.fabs = call float @llvm.fabs.f32(float %src) - %src.fabs.fneg = fsub float -0.0, %src.fabs + %src.fabs.fneg = fneg float %src.fabs %rcp = fdiv float 1.0, %src.fabs.fneg, !fpmath !0 store float %rcp, float addrspace(1)* %out, align 4 ret void @@ -108,7 +108,7 @@ ; GCN: buffer_store_dword [[MUL]] define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 { %src.fabs = call float @llvm.fabs.f32(float %src) - %src.fabs.fneg = fsub float -0.0, %src.fabs + %src.fabs.fneg = fneg float %src.fabs %rcp = fdiv float 1.0, %src.fabs.fneg, !fpmath !0 store volatile float %rcp, float addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/rsq.ll b/llvm/test/CodeGen/AMDGPU/rsq.ll --- a/llvm/test/CodeGen/AMDGPU/rsq.ll +++ b/llvm/test/CodeGen/AMDGPU/rsq.ll @@ -116,7 +116,7 @@ ; SI-UNSAFE: buffer_store_dword [[RSQ]] define amdgpu_kernel void @neg_rsq_neg_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in) #0 { %val = load float, float addrspace(1)* %in, align 4 - %val.fneg = fsub float -0.0, %val + %val.fneg = fneg float %val %sqrt = call float @llvm.sqrt.f32(float %val.fneg) %div = fdiv float -1.0, %sqrt, !fpmath !0 store float %div, float addrspace(1)* %out, align 4 diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll --- a/llvm/test/CodeGen/AMDGPU/v_mac.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll @@ -105,7 +105,7 @@ %b = load float, float addrspace(1)* %b_ptr %c = load float, float addrspace(1)* %c_ptr - %neg_a = fsub float -0.0, %a + %neg_a = fneg float %a %tmp0 = fmul float %neg_a, %b %tmp1 = fadd float %tmp0, %c @@ -165,7 +165,7 @@ %b = load float, float addrspace(1)* %b_ptr %c = load float, float addrspace(1)* %c_ptr - %neg_b = fsub float -0.0, %b + %neg_b = fneg float %b %tmp0 = fmul float %a, %neg_b %tmp1 = fadd float %tmp0, %c @@ -205,7 +205,7 @@ %b = load float, float addrspace(1)* %b_ptr %c = load float, float addrspace(1)* %c_ptr - %neg_c = fsub float -0.0, %c + %neg_c = fneg float %c %tmp0 = fmul float %a, %b %tmp1 = fadd float %tmp0, %neg_c diff --git a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll --- a/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll +++ b/llvm/test/CodeGen/AMDGPU/v_mac_f16.ll @@ -83,7 +83,7 @@ %b.val = load half, half addrspace(1)* %b %c.val = load half, half addrspace(1)* %c - %a.neg = fsub half -0.0, %a.val + %a.neg = fneg half %a.val %t.val = fmul half %a.neg, %b.val %r.val = fadd half %t.val, %c.val @@ -110,7 +110,7 @@ %b.val = load half, half addrspace(1)* %b %c.val = load half, half addrspace(1)* %c - %b.neg = fsub half -0.0, %b.val + %b.neg = fneg half %b.val %t.val = fmul half %a.val, %b.neg %r.val = fadd half %t.val, %c.val @@ -137,7 +137,7 @@ %b.val = load half, half addrspace(1)* %b %c.val = load half, half addrspace(1)* %c - %c.neg = fsub half -0.0, %c.val + %c.neg = fneg half %c.val %t.val = fmul half %a.val, %b.val %r.val = fadd half %t.val, %c.neg @@ -410,7 +410,7 @@ %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %c.val = load <2 x half>, <2 x half> addrspace(1)* %c - %a.neg = fsub <2 x half> , %a.val + %a.neg = fneg <2 x half> %a.val %t.val = fmul <2 x half> %a.neg, %b.val %r.val = fadd <2 x half> %t.val, %c.val @@ -439,7 +439,7 @@ %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %c.val = load <2 x half>, <2 x half> addrspace(1)* %c - %b.neg = fsub <2 x half> , %b.val + %b.neg = fneg <2 x half> %b.val %t.val = fmul <2 x half> %a.val, %b.neg %r.val = fadd <2 x half> %t.val, %c.val @@ -472,7 +472,7 @@ %b.val = load <2 x half>, <2 x half> addrspace(1)* %b %c.val = load <2 x half>, <2 x half> addrspace(1)* %c - %c.neg = fsub <2 x half> , %c.val + %c.neg = fneg <2 x half> %c.val %t.val = fmul <2 x half> %a.val, %b.val %r.val = fadd <2 x half> %t.val, %c.neg