Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12581,18 +12581,19 @@
   }
 
   // (fsub -0.0, N1) -> -N1
-  // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
-  //       FSUB does not specify the sign bit of a NaN. Also note that for
-  //       the same reason, the inverse transform is not safe, unless fast math
-  //       flags are in play.
   if (N0CFP && N0CFP->isZero()) {
     if (N0CFP->isNegative() ||
         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
-      if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) !=
-          TargetLowering::NegatibleCost::Expensive)
-        return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
-      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-        return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+      // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
+      // flushed to zero.
+      DenormalMode DenormMode = DAG.getDenormalMode(VT);
+      if (DenormMode == DenormalMode::getIEEE()) {
+        if (TLI.getNegatibleCost(N1, DAG, LegalOperations, ForCodeSize) !=
+            TargetLowering::NegatibleCost::Expensive)
+          return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+        if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+          return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+      }
     }
   }
 
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -694,7 +694,7 @@
   void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
   void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
   void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
-  void visitFSub(const User &I);
+  void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
   void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
   void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
   void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2984,20 +2984,6 @@
   DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
 }
 
-void SelectionDAGBuilder::visitFSub(const User &I) {
-  // -0.0 - X --> fneg
-  Type *Ty = I.getType();
-  if (isa<Constant>(I.getOperand(0)) &&
-      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
-    SDValue Op2 = getValue(I.getOperand(1));
-    setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
-                             Op2.getValueType(), Op2));
-    return;
-  }
-
-  visitBinary(I, ISD::FSUB);
-}
-
 void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
   SDNodeFlags Flags;
 
Index: llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -311,7 +311,7 @@
   %id = tail call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
   %load = load float, float addrspace(1)* %gep, align 4
-  %v = fsub float -0.0, %load
+  %v = fneg float %load
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   store float %canonicalized, float addrspace(1)* %gep, align 4
   ret void
@@ -327,7 +327,7 @@
   %gep = getelementptr inbounds float, float addrspace(1)* %arg, i32 %id
   %load = load float, float addrspace(1)* %gep, align 4
   %v0 = fadd float %load, 0.0
-  %v = fsub float -0.0, %v0
+  %v = fneg float %v0
   %canonicalized = tail call float @llvm.canonicalize.f32(float %v)
   store float %canonicalized, float addrspace(1)* %gep, align 4
   ret void
Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
+++ llvm/test/CodeGen/AMDGPU/fcanonicalize.f16.ll
@@ -77,7 +77,7 @@
 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #1 {
   %val = load half, half addrspace(1)* %out
   %val.fabs = call half @llvm.fabs.f16(half %val)
-  %val.fabs.fneg = fsub half -0.0, %val.fabs
+  %val.fabs.fneg = fneg half %val.fabs
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
   store half %canonicalized, half addrspace(1)* %out
   ret void
@@ -91,7 +91,7 @@
 ; CI: v_mul_f32_e32 {{v[0-9]+}}, 1.0, {{v[0-9]+}}
 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f16(half addrspace(1)* %out) #1 {
   %val = load half, half addrspace(1)* %out
-  %val.fneg = fsub half -0.0, %val
+  %val.fneg = fneg half %val
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
   store half %canonicalized, half addrspace(1)* %out
   ret void
@@ -102,7 +102,7 @@
 ; GFX89: {{flat|global}}_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REG]]
 define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_var_f16(half addrspace(1)* %out) #2 {
   %val = load half, half addrspace(1)* %out
-  %val.fneg = fsub half -0.0, %val
+  %val.fneg = fneg half %val
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fneg)
   store half %canonicalized, half addrspace(1)* %out
   ret void
@@ -117,7 +117,7 @@
 define amdgpu_kernel void @v_test_no_denormals_canonicalize_fneg_fabs_var_f16(half addrspace(1)* %out) #2 {
   %val = load half, half addrspace(1)* %out
   %val.fabs = call half @llvm.fabs.f16(half %val)
-  %val.fabs.fneg = fsub half -0.0, %val.fabs
+  %val.fabs.fneg = fneg half %val.fabs
   %canonicalized = call half @llvm.canonicalize.f16(half %val.fabs.fneg)
   store half %canonicalized, half addrspace(1)* %out
   ret void
@@ -320,7 +320,7 @@
   %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
   %val = load <2 x half>, <2 x half> addrspace(1)* %gep
   %val.fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val)
-  %val.fabs.fneg = fsub <2 x half> <half -0.0, half -0.0>, %val.fabs
+  %val.fabs.fneg = fneg <2 x half> %val.fabs
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %val.fabs.fneg)
   store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
   ret void
@@ -337,7 +337,7 @@
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
   %val = load <2 x half>, <2 x half> addrspace(1)* %gep
-  %fneg.val = fsub <2 x half> <half -0.0, half -0.0>, %val
+  %fneg.val = fneg <2 x half> %val
   %canonicalized = call <2 x half> @llvm.canonicalize.v2f16(<2 x half> %fneg.val)
   store <2 x half> %canonicalized, <2 x half> addrspace(1)* %out
   ret void
Index: llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
+++ llvm/test/CodeGen/AMDGPU/fcanonicalize.ll
@@ -51,7 +51,7 @@
 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f32(float addrspace(1)* %out) #1 {
   %val = load float, float addrspace(1)* %out
   %val.fabs = call float @llvm.fabs.f32(float %val)
-  %val.fabs.fneg = fsub float -0.0, %val.fabs
+  %val.fabs.fneg = fneg float %val.fabs
   %canonicalized = call float @llvm.canonicalize.f32(float %val.fabs.fneg)
   store float %canonicalized, float addrspace(1)* %out
   ret void
@@ -62,7 +62,7 @@
 ; GCN: buffer_store_dword [[REG]]
 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f32(float addrspace(1)* %out) #1 {
   %val = load float, float addrspace(1)* %out
-  %val.fneg = fsub float -0.0, %val
+  %val.fneg = fneg float %val
   %canonicalized = call float @llvm.canonicalize.f32(float %val.fneg)
   store float %canonicalized, float addrspace(1)* %out
   ret void
@@ -257,7 +257,7 @@
 define amdgpu_kernel void @v_test_canonicalize_fneg_fabs_var_f64(double addrspace(1)* %out) #1 {
   %val = load double, double addrspace(1)* %out
   %val.fabs = call double @llvm.fabs.f64(double %val)
-  %val.fabs.fneg = fsub double -0.0, %val.fabs
+  %val.fabs.fneg = fneg double %val.fabs
   %canonicalized = call double @llvm.canonicalize.f64(double %val.fabs.fneg)
   store double %canonicalized, double addrspace(1)* %out
   ret void
@@ -268,7 +268,7 @@
 ; GCN: buffer_store_dwordx2 [[REG]]
 define amdgpu_kernel void @v_test_canonicalize_fneg_var_f64(double addrspace(1)* %out) #1 {
   %val = load double, double addrspace(1)* %out
-  %val.fneg = fsub double -0.0, %val
+  %val.fneg = fneg double %val
   %canonicalized = call double @llvm.canonicalize.f64(double %val.fneg)
   store double %canonicalized, double addrspace(1)* %out
   ret void
Index: llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -56,7 +56,7 @@
 ; GCN:            global_store_dword v[{{[0-9:]+}}], [[OUT]], off
 define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
   %load = load float, float addrspace(1)* %arg, align 4
-  %neg = fsub float -0.000000e+00, %load
+  %neg = fneg float %load
   %div = fdiv float 1.000000e+00, %neg, !fpmath !0
   store float %div, float addrspace(1)* %arg, align 4
   ret void
@@ -77,7 +77,7 @@
 ; GCN:            global_store_dword v[{{[0-9:]+}}], [[OUT]], off
 define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg) {
   %load = load float, float addrspace(1)* %arg, align 4
-  %neg = fsub float -0.000000e+00, %load
+  %neg = fneg float %load
   %div = fdiv float -1.000000e+00, %neg, !fpmath !0
   store float %div, float addrspace(1)* %arg, align 4
   ret void
@@ -186,7 +186,7 @@
 ; GCN-FLUSH:      global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off
 define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
   %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
-  %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
+  %neg = fneg <4 x float> %load
   %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0
   store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
   ret void
@@ -224,7 +224,7 @@
 ; GCN-FLUSH:      global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off
 define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
   %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
-  %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
+  %neg = fneg <4 x float> %load
   %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0
   store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
   ret void
@@ -311,7 +311,7 @@
 ; GCN:            global_store_dwordx4
 define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) {
   %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16
-  %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load
+  %neg = fneg <4 x float> %load
   %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0
   store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16
   ret void
@@ -370,7 +370,7 @@
 ; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
 define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) {
   %load = load float, float addrspace(1)* %arg, align 4
-  %neg = fsub float -0.000000e+00, %load, !fpmath !0
+  %neg = fneg float %load, !fpmath !0
   %div = fdiv fast float 1.000000e+00, %neg
   store float %div, float addrspace(1)* %arg, align 4
   ret void
@@ -382,7 +382,7 @@
 ; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off
 define amdgpu_kernel void @div_minus_1_by_minus_x_fast(float addrspace(1)* %arg) {
   %load = load float, float addrspace(1)* %arg, align 4
-  %neg = fsub float -0.000000e+00, %load, !fpmath !0
+  %neg = fneg float %load, !fpmath !0
   %div = fdiv fast float -1.000000e+00, %neg
   store float %div, float addrspace(1)* %arg, align 4
   ret void
@@ -422,7 +422,7 @@
 ; GCN:     v_div_fixup_f32
 define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) {
   %load = load float, float addrspace(1)* %arg, align 4
-  %neg = fsub float -0.000000e+00, %load
+  %neg = fneg float %load
   %div = fdiv float 1.000000e+00, %neg
   store float %div, float addrspace(1)* %arg, align 4
   ret void
@@ -436,7 +436,7 @@
 ; GCN:     v_div_fixup_f32
 define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) {
   %load = load float, float addrspace(1)* %arg, align 4
-  %neg = fsub float -0.000000e+00, %load
+  %neg = fneg float %load
   %div = fdiv float -1.000000e+00, %neg
   store float %div, float addrspace(1)* %arg, align 4
   ret void
Index: llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -216,7 +216,7 @@
   %r1 = load volatile half, half addrspace(1)* %gep.0
   %r2 = load volatile half, half addrspace(1)* %gep.1
 
-  %r1.fneg = fsub half -0.000000e+00, %r1
+  %r1.fneg = fneg half %r1
 
   %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2)
   store half %r3, half addrspace(1)* %gep.out
@@ -247,7 +247,7 @@
   %r1 = load volatile half, half addrspace(1)* %gep.0
   %r2 = load volatile half, half addrspace(1)* %gep.1
 
-  %r1.fneg = fsub half -0.000000e+00, %r1
+  %r1.fneg = fneg half %r1
 
   %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2)
   store half %r3, half addrspace(1)* %gep.out
@@ -272,7 +272,7 @@
   %r1 = load volatile half, half addrspace(1)* %gep.0
   %r2 = load volatile half, half addrspace(1)* %gep.1
 
-  %r2.fneg = fsub half -0.000000e+00, %r2
+  %r2.fneg = fneg half %r2
 
   %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1, half %r2.fneg)
   store half %r3, half addrspace(1)* %gep.out
@@ -454,8 +454,8 @@
   %a = load volatile half, half addrspace(1)* %gep0, align 2
   %b = load volatile half, half addrspace(1)* %gep1, align 2
   %c = load volatile half, half addrspace(1)* %gep2, align 2
-  %nega = fsub half -0.000000e+00, %a
-  %negb = fsub half -0.000000e+00, %b
+  %nega = fneg half %a
+  %negb = fneg half %b
   %mul = fmul half %nega, %negb
   %sub = fadd half %mul, %c
   store half %sub, half addrspace(1)* %outgep, align 2
Index: llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -254,7 +254,7 @@
   %r1 = load volatile float, float addrspace(1)* %gep.0
   %r2 = load volatile float, float addrspace(1)* %gep.1
 
-  %r1.fneg = fsub float -0.000000e+00, %r1
+  %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -287,7 +287,7 @@
   %r1 = load volatile float, float addrspace(1)* %gep.0
   %r2 = load volatile float, float addrspace(1)* %gep.1
 
-  %r1.fneg = fsub float -0.000000e+00, %r1
+  %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -319,7 +319,7 @@
   %r1 = load volatile float, float addrspace(1)* %gep.0
   %r2 = load volatile float, float addrspace(1)* %gep.1
 
-  %r2.fneg = fsub float -0.000000e+00, %r2
+  %r2.fneg = fneg float %r2
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg)
   store float %r3, float addrspace(1)* %gep.out
@@ -497,8 +497,8 @@
   %a = load volatile float, float addrspace(1)* %gep0, align 4
   %b = load volatile float, float addrspace(1)* %gep1, align 4
   %c = load volatile float, float addrspace(1)* %gep2, align 4
-  %nega = fsub float -0.000000e+00, %a
-  %negb = fsub float -0.000000e+00, %b
+  %nega = fneg float %a
+  %negb = fneg float %b
   %mul = fmul float %nega, %negb
   %sub = fadd float %mul, %c
   store float %sub, float addrspace(1)* %outgep, align 4
Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -26,7 +26,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %add = fadd float %a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -47,7 +47,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %add = fadd float %a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %add, float addrspace(1)* %out
   ret void
@@ -75,7 +75,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %add = fadd float %a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   %use1 = fmul float %add, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -100,9 +100,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -124,9 +124,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %add = fadd float %a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -148,10 +148,10 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %add = fadd float %fneg.a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -177,9 +177,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -205,9 +205,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   %use1 = fmul float %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -226,7 +226,7 @@
   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
   %.i188 = fadd float %tmp9, 0.000000e+00
   %tmp10 = fcmp uge float %.i188, %tmp2
-  %tmp11 = fsub float -0.000000e+00, %.i188
+  %tmp11 = fneg float %.i188
   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
   %tmp12 = fcmp ule float %.i092, 0.000000e+00
   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
@@ -249,7 +249,7 @@
   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
   %.i188 = fadd float %tmp9, 0.000000e+00
   %tmp10 = fcmp uge float %.i188, %tmp2
-  %tmp11 = fsub float -0.000000e+00, %.i188
+  %tmp11 = fneg float %.i188
   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
   %tmp12 = fcmp ule float %.i092, 0.000000e+00
   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
@@ -274,7 +274,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -295,7 +295,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %mul, float addrspace(1)* %out
   ret void
@@ -318,7 +318,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = fmul float %mul, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -338,9 +338,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -358,9 +358,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %mul = fmul float %a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -378,10 +378,10 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %mul = fmul float %fneg.a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -402,9 +402,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -425,9 +425,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = fmul float %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -454,7 +454,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -466,7 +466,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -482,7 +482,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float %a, float %a)
-  %min.fneg = fsub float -0.0, %min
+  %min.fneg = fneg float %min
   store float %min.fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -493,7 +493,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float %a, float %a)
-  %min.fneg = fsub float -0.0, %min
+  %min.fneg = fneg float %min
   ret float %min.fneg
 }
 
@@ -509,7 +509,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -520,7 +520,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -536,7 +536,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -547,7 +547,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -562,7 +562,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -579,7 +579,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -602,7 +602,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -624,7 +624,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -730,7 +730,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -750,7 +750,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %mul = fmul float %fneg, %b
   store float %mul, float addrspace(1)* %out.gep
   ret void
@@ -779,7 +779,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %mul = fmul float %fneg, %b
   store float %mul, float addrspace(1)* %out.gep
   ret void
@@ -793,7 +793,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %mul = fmul float %fneg, %b
   ret float %mul
 }
@@ -816,7 +816,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %use1 = fmul float %min, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -831,7 +831,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %use1 = fmul float %min, 4.0
   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
@@ -859,7 +859,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -871,7 +871,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -887,7 +887,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float %a, float %a)
-  %max.fneg = fsub float -0.0, %max
+  %max.fneg = fneg float %max
   store float %max.fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -898,7 +898,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float %a, float %a)
-  %max.fneg = fsub float -0.0, %max
+  %max.fneg = fneg float %max
   ret float %max.fneg
 }
 
@@ -914,7 +914,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -925,7 +925,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -941,7 +941,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -952,7 +952,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -967,7 +967,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -984,7 +984,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -995,7 +995,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -1015,7 +1015,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %mul = fmul float %fneg, %b
   store float %mul, float addrspace(1)* %out.gep
   ret void
@@ -1029,7 +1029,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %mul = fmul float %fneg, %b
   ret float %mul
 }
@@ -1052,7 +1052,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %use1 = fmul float %max, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1067,7 +1067,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %use1 = fmul float %max, 4.0
   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
@@ -1099,7 +1099,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1123,7 +1123,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fma, float addrspace(1)* %out
   ret void
@@ -1154,7 +1154,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   %use1 = fmul float %fma, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1182,9 +1182,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1210,9 +1210,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1238,10 +1238,10 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1267,10 +1267,10 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.c = fsub float -0.000000e+00, %c
+  %fneg.a = fneg float %a
+  %fneg.c = fneg float %c
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1296,9 +1296,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.c = fsub float -0.000000e+00, %c
+  %fneg.c = fneg float %c
   %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1330,9 +1330,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -1360,9 +1360,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   %use1 = fmul float %fneg.a, %d
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1394,7 +1394,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1446,7 +1446,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   %use1 = fmul float %fma, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1483,7 +1483,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpext = fpext float %fneg.a to double
   %fneg = fsub double -0.000000e+00, %fpext
   store double %fneg, double addrspace(1)* %out.gep
@@ -1502,7 +1502,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpext = fpext float %fneg.a to double
   %fneg = fsub double -0.000000e+00, %fpext
   store volatile double %fneg, double addrspace(1)* %out.gep
@@ -1559,7 +1559,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile half, half addrspace(1)* %a.gep
   %fpext = fpext half %a to float
-  %fneg = fsub float -0.000000e+00, %fpext
+  %fneg = fneg float %fpext
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %fpext, float addrspace(1)* %out.gep
   ret void
@@ -1573,7 +1573,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile half, half addrspace(1)* %a.gep
   %fpext = fpext half %a to float
-  %fneg = fsub float -0.000000e+00, %fpext
+  %fneg = fneg float %fpext
   %mul = fmul float %fpext, 4.0
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %mul, float addrspace(1)* %out.gep
@@ -1595,7 +1595,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile double, double addrspace(1)* %a.gep
   %fpround = fptrunc double %a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1612,7 +1612,7 @@
   %a = load volatile double, double addrspace(1)* %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1631,7 +1631,7 @@
   %a = load volatile double, double addrspace(1)* %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile double %fneg.a, double addrspace(1)* undef
   ret void
@@ -1652,7 +1652,7 @@
   %a = load volatile double, double addrspace(1)* %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   %use1 = fmul double %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile double %use1, double addrspace(1)* undef
@@ -1685,7 +1685,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   store half %fneg, half addrspace(1)* %out.gep
@@ -1705,7 +1705,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile double, double addrspace(1)* %a.gep
   %fpround = fptrunc double %a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %fpround, float addrspace(1)* %out.gep
   ret void
@@ -1723,7 +1723,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   store volatile half %fneg, half addrspace(1)* %out.gep
@@ -1743,7 +1743,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   %use1 = fmul float %fneg.a, %c
@@ -1767,7 +1767,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1782,9 +1782,9 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1801,9 +1801,9 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %fneg.a, float addrspace(1)* undef
   ret void
@@ -1821,9 +1821,9 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   %use1 = fmul float %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %use1, float addrspace(1)* undef
@@ -1848,7 +1848,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1869,7 +1869,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %mul, float addrspace(1)* %out
   ret void
@@ -1891,7 +1891,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1911,9 +1911,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1931,9 +1931,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1951,10 +1951,10 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1974,9 +1974,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -1997,9 +1997,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -2023,7 +2023,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %sin = call float @llvm.sin.f32(float %a)
-  %fneg = fsub float -0.000000e+00, %sin
+  %fneg = fneg float %sin
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2039,7 +2039,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %sin = call float @llvm.amdgcn.sin.f32(float %a)
-  %fneg = fsub float -0.0, %sin
+  %fneg = fneg float %sin
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2059,7 +2059,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %trunc = call float @llvm.trunc.f32(float %a)
-  %fneg = fsub float -0.0, %trunc
+  %fneg = fneg float %trunc
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2086,7 +2086,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %round = call float @llvm.round.f32(float %a)
-  %fneg = fsub float -0.0, %round
+  %fneg = fneg float %round
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2106,7 +2106,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %rint = call float @llvm.rint.f32(float %a)
-  %fneg = fsub float -0.0, %rint
+  %fneg = fneg float %rint
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2126,7 +2126,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %nearbyint = call float @llvm.nearbyint.f32(float %a)
-  %fneg = fsub float -0.0, %nearbyint
+  %fneg = fneg float %nearbyint
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2146,7 +2146,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %trunc = call float @llvm.canonicalize.f32(float %a)
-  %fneg = fsub float -0.0, %trunc
+  %fneg = fneg float %trunc
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2170,7 +2170,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
   store volatile float %intrp0, float addrspace(1)* %out.gep
@@ -2193,7 +2193,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
   store volatile float %intrp0, float addrspace(1)* %out.gep
@@ -2230,7 +2230,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   %cmp0 = icmp eq i32 %d, 0
   br i1 %cmp0, label %if, label %endif
 
@@ -2266,7 +2266,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   call void asm sideeffect "; use $0", "v"(float %fneg) #0
   store volatile float %fneg, float addrspace(1)* %out.gep
   ret void
@@ -2295,7 +2295,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   call void asm sideeffect "; use $0", "v"(float %fneg) #0
   store volatile float %mul, float addrspace(1)* %out.gep
   ret void
@@ -2328,7 +2328,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
 
-  %fneg.a = fsub float -0.0, %a
+  %fneg.a = fneg float %a
   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
   %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
 
@@ -2360,7 +2360,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
 
-  %fneg.a = fsub float -0.0, %a
+  %fneg.a = fneg float %a
   %mul0 = fmul float %fneg.a, %b
   %mul1 = fmul float %fneg.a, %c
 
@@ -2391,7 +2391,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
 
-  %fneg.a = fsub float -0.0, %a
+  %fneg.a = fneg float %a
   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
   %mul1 = fmul float %fneg.a, %c
 
@@ -2433,7 +2433,7 @@
   %d = load volatile float, float addrspace(1)* %d.gep
 
   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
-  %fneg.fma0 = fsub float -0.0, %fma0
+  %fneg.fma0 = fneg float %fma0
   %mul1 = fmul float %fneg.fma0, %c
   %mul2 = fmul float %fneg.fma0, %d
 
@@ -2501,7 +2501,7 @@
   %d = load volatile float, float addrspace(1)* %d.gep
 
   %trunc.a = call float @llvm.trunc.f32(float %a)
-  %trunc.fneg.a = fsub float -0.0, %trunc.a
+  %trunc.fneg.a = fneg float %trunc.a
   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
   store volatile float %fma0, float addrspace(1)* %out
   ret void
@@ -2531,7 +2531,7 @@
   %d = load volatile float, float addrspace(1)* %d.gep
 
   %trunc.a = call float @llvm.trunc.f32(float %a)
-  %trunc.fneg.a = fsub float -0.0, %trunc.a
+  %trunc.fneg.a = fneg float %trunc.a
   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
   %mul1 = fmul float %trunc.a, %d
   store volatile float %fma0, float addrspace(1)* %out
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -12,10 +12,10 @@
 ; GCN-LABEL: {{^}}test_fmed3_srcmods:
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, |v{{[0-9]+}}|, -|v{{[0-9]+}}|
 define amdgpu_kernel void @test_fmed3_srcmods(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
-  %src0.fneg = fsub float -0.0, %src0
+  %src0.fneg = fneg float %src0
   %src1.fabs = call float @llvm.fabs.f32(float %src1)
   %src2.fabs = call float @llvm.fabs.f32(float %src2)
-  %src2.fneg.fabs = fsub float -0.0, %src2.fabs
+  %src2.fneg.fabs = fneg float %src2.fabs
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0.fneg, float %src1.fabs, float %src2.fneg.fabs)
   store float %med3, float addrspace(1)* %out
   ret void
@@ -25,7 +25,7 @@
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
 define amdgpu_kernel void @test_fneg_fmed3(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
-  %neg.med3 = fsub float -0.0, %med3
+  %neg.med3 = fneg float %med3
   store float %neg.med3, float addrspace(1)* %out
   ret void
 }
@@ -35,7 +35,7 @@
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, -4.0, [[MED3]]
 define amdgpu_kernel void @test_fneg_fmed3_multi_use(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
-  %neg.med3 = fsub float -0.0, %med3
+  %neg.med3 = fneg float %med3
   %med3.user = fmul float %med3, 4.0
   store volatile float %med3.user, float addrspace(1)* %out
   store volatile float %neg.med3, float addrspace(1)* %out
@@ -57,7 +57,7 @@
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, -v{{[0-9]+}}, [[NEG0]]
 define amdgpu_kernel void @test_fneg_fmed3_rr_0(float addrspace(1)* %out, float %src0, float %src1) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
-  %neg.med3 = fsub float -0.0, %med3
+  %neg.med3 = fneg float %med3
   store float %neg.med3, float addrspace(1)* %out
   ret void
 }
@@ -69,7 +69,7 @@
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
 define amdgpu_kernel void @test_fneg_fmed3_rr_0_foldable_user(float addrspace(1)* %out, float %src0, float %src1, float %mul.arg) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float 0.0)
-  %neg.med3 = fsub float -0.0, %med3
+  %neg.med3 = fneg float %med3
   %mul = fmul float %neg.med3, %mul.arg
   store float %mul, float addrspace(1)* %out
   ret void
@@ -81,7 +81,7 @@
 ; GCN: v_med3_f32 v{{[0-9]+}}, -s{{[0-9]+}}, [[NEG_INV]], [[NEG0]]
 define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0(float addrspace(1)* %out, float %src0) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
-  %neg.med3 = fsub float -0.0, %med3
+  %neg.med3 = fneg float %med3
   store float %neg.med3, float addrspace(1)* %out
   ret void
 }
@@ -93,7 +93,7 @@
 ; GCN: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, [[MED3]]
 define amdgpu_kernel void @test_fneg_fmed3_r_inv2pi_0_foldable_user(float addrspace(1)* %out, float %src0, float %mul.arg) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float 0x3FC45F3060000000, float 0.0)
-  %neg.med3 = fsub float -0.0, %med3
+  %neg.med3 = fneg float %med3
   %mul = fmul float %neg.med3, %mul.arg
   store float %mul, float addrspace(1)* %out
   ret void
Index: llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
+++ llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
@@ -11,7 +11,7 @@
 entry:
   %0 = fcmp olt float %in, 0.000000e+00
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   %4 = bitcast i32 %3 to float
   %5 = bitcast float %4 to i32
@@ -39,7 +39,7 @@
 entry:
   %0 = fcmp olt float %in, 0.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   %4 = bitcast i32 %3 to float
   %5 = bitcast float %4 to i32
Index: llvm/test/CodeGen/AMDGPU/set-dx10.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/set-dx10.ll
+++ llvm/test/CodeGen/AMDGPU/set-dx10.ll
@@ -12,7 +12,7 @@
 entry:
   %0 = fcmp une float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -38,7 +38,7 @@
 entry:
   %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -64,7 +64,7 @@
 entry:
   %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -90,7 +90,7 @@
 entry:
   %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -116,7 +116,7 @@
 entry:
   %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -142,7 +142,7 @@
 entry:
   %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
Index: llvm/test/CodeGen/X86/fp-denormals.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/fp-denormals.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s
+
+;
+; FSUB(+-0.0, X) -> FNEG(X)
+;
+
+define float @fsub_fneg_default(float %a) #0 {
+; CHECK-LABEL: fsub_fneg_default:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    subss %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %1 = fsub float -0.0, %a
+  ret float %1
+}
+
+define float @fsub_fneg_ieee(float %a) #1 {
+; CHECK-LABEL: fsub_fneg_ieee:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorps {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    retq
+  %1 = fsub float -0.0, %a
+  ret float %1
+}
+
+define float @fsub_fneg_preserve_sign(float %a) #2 {
+; CHECK-LABEL: fsub_fneg_preserve_sign:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    subss %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %1 = fsub float -0.0, %a
+  ret float %1
+}
+
+define float @fsub_fneg_positive_zero(float %a) #3 {
+; CHECK-LABEL: fsub_fneg_positive_zero:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    subss %xmm0, %xmm1
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %1 = fsub float -0.0, %a
+  ret float %1
+}
+
+attributes #0 = { "denormal-fp-math"="invalid" }
+attributes #1 = { "denormal-fp-math"="ieee" }
+attributes #2 = { "denormal-fp-math"="preserve-sign" }
+attributes #3 = { "denormal-fp-math"="positive-zero" }