Index: llvm/include/llvm/CodeGen/TargetLowering.h
===================================================================
--- llvm/include/llvm/CodeGen/TargetLowering.h
+++ llvm/include/llvm/CodeGen/TargetLowering.h
@@ -460,6 +460,11 @@
     return true;
   }
 
+  /// Return true if denormals will be flushed to zero.
+  virtual bool willCanonicalize(SelectionDAG &DAG, SDNode *N) const {
+    return false;
+  }
+
   /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
   virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
     // Default behavior is to replace SQRT(X) with X*RSQRT(X).
Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -12708,18 +12708,26 @@
   }
 
   // (fsub -0.0, N1) -> -N1
-  // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
-  //       FSUB does not specify the sign bit of a NaN. Also note that for
-  //       the same reason, the inverse transform is not safe, unless fast math
-  //       flags are in play.
   if (N0CFP && N0CFP->isZero()) {
     if (N0CFP->isNegative() ||
         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
-      if (SDValue NegN1 =
-              TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
-        return NegN1;
-      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-        return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+      // We cannot replace an FSUB(+-0.0,X) with FNEG(X) when denormals are
+      // flushed to zero, unless all users treat denorms as zero (DAZ).
+      DenormalMode DenormMode = DAG.getDenormalMode(VT);
+
+      // Check that all uses will flush denorms to zero.
+      bool Flushed = true;
+      for (auto UI = N->use_begin(), E = N->use_end(); UI != E; ++UI)
+        if (!TLI.willCanonicalize(DAG, *UI))
+          Flushed = false;
+
+      if (Flushed || (DenormMode == DenormalMode::getIEEE())) {
+        if (SDValue NegN1 =
+                TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize))
+          return NegN1;
+        if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
+          return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
+      }
     }
   }
 
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -692,7 +692,7 @@
   void visitAdd(const User &I)  { visitBinary(I, ISD::ADD); }
   void visitFAdd(const User &I) { visitBinary(I, ISD::FADD); }
   void visitSub(const User &I)  { visitBinary(I, ISD::SUB); }
-  void visitFSub(const User &I);
+  void visitFSub(const User &I) { visitBinary(I, ISD::FSUB); }
   void visitMul(const User &I)  { visitBinary(I, ISD::MUL); }
   void visitFMul(const User &I) { visitBinary(I, ISD::FMUL); }
   void visitURem(const User &I) { visitBinary(I, ISD::UREM); }
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3003,20 +3003,6 @@
   DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
 }
 
-void SelectionDAGBuilder::visitFSub(const User &I) {
-  // -0.0 - X --> fneg
-  Type *Ty = I.getType();
-  if (isa<Constant>(I.getOperand(0)) &&
-      I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
-    SDValue Op2 = getValue(I.getOperand(1));
-    setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
-                             Op2.getValueType(), Op2));
-    return;
-  }
-
-  visitBinary(I, ISD::FSUB);
-}
-
 void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
   SDNodeFlags Flags;
 
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -176,6 +176,7 @@
                                NegatibleCost &Cost,
                                unsigned Depth) const override;
 
+  bool willCanonicalize(SelectionDAG &DAG, SDNode *N) const override;
   bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
 
   EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
Index: llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -831,6 +831,52 @@
                                               ForCodeSize, Cost, Depth);
 }
 
+// Return true if the Opcode will treat denormals as zero (DAZ).
+bool AMDGPUTargetLowering::willCanonicalize(SelectionDAG &DAG, SDNode *N) const {
+  // FIXME: This is not a complete list. This only represents current
+  //        testing.
+  switch (N->getOpcode()) {
+  default: return false;
+  case ISD::FCANONICALIZE:
+  case ISD::FADD:
+  case ISD::FSUB:
+  case ISD::FMUL:
+  case ISD::FMA:
+  case ISD::FMAD:
+  case ISD::FMAXNUM:
+  case ISD::FP_EXTEND:
+  case ISD::FP_ROUND:
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:
+  case ISD::FTRUNC:
+  case ISD::FSQRT:
+  case AMDGPUISD::CLAMP:
+  case AMDGPUISD::FMAD_FTZ:
+  case AMDGPUISD::FMED3:
+  case AMDGPUISD::RCP:
+    return true;
+  case ISD::FNEG:
+  case ISD::EXTRACT_VECTOR_ELT:
+  case ISD::EXTRACT_SUBVECTOR: {
+    for (auto UI = N->use_begin(), E = N->use_end(); UI != E; ++UI)
+      if (!willCanonicalize(DAG, *UI))
+        return false;
+    return true;
+  }
+  case ISD::INTRINSIC_WO_CHAIN: {
+    unsigned IntrinsicID
+      = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+    switch (IntrinsicID) {
+    case Intrinsic::amdgcn_fdiv_fast:
+      return true;
+    }
+    return false;
+  }
+  }
+
+  llvm_unreachable("invalid operation");
+}
+
 //===---------------------------------------------------------------------===//
 // Target Properties
 //===---------------------------------------------------------------------===//
Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -9145,7 +9145,6 @@
   switch (Opcode) {
   // These will flush denorms if required.
   case ISD::FADD:
-  case ISD::FSUB:
   case ISD::FMUL:
   case ISD::FCEIL:
   case ISD::FFLOOR:
@@ -9175,7 +9174,25 @@
   case AMDGPUISD::CVT_F32_UBYTE2:
   case AMDGPUISD::CVT_F32_UBYTE3:
     return true;
+  case ISD::FSUB: {
+    SDValue N0 = Op.getOperand(0);
+    ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
+    const TargetOptions &Options = DAG.getTarget().Options;
+    const SDNodeFlags Flags = Op->getFlags();
+
+    // FIXME: This works around a bug with FCANONICALIZE. Legalize
+    //        will remove the FCANONICALIZE before the FSUB(-0,X)->FNEG(X)
+    //        transform is considered.
+    // FSUB(+-0.0, X) will become FNEG(X)
+    if (N0CFP && N0CFP->isZero()) {
+      if (N0CFP->isNegative() ||
+          (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
+        return false;
+      }
+    }
 
+    return true;
+  }
   // It can/will be lowered or combined as a bit operation.
   // Need to check their input recursively to handle.
   case ISD::FNEG:
Index: llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -232,8 +232,8 @@
 
 ; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp:
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
@@ -274,10 +274,10 @@
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
@@ -298,8 +298,8 @@
 
 ; GCN-DENORM-DAG: v_div_fmas_f32
 ; GCN-DENORM-DAG: v_div_fmas_f32
-; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}}
+; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}}
 
 ; GCN-FLUSH-DAG:  v_rcp_f32_e32
 ; GCN-FLUSH-DAG:  v_rcp_f32_e64
Index: llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.f16.ll
@@ -192,6 +192,10 @@
   ret void
 }
 
+; FIXME: The MAD only folds the FSUB(-0,X) when the FNEG(X) transform
+;        happens in SelectionDAGBuilder. DAGCombiner probably needs to
+;        be updated to fold the FNEG after visitFSUB(...) runs.
+
 ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f16
 ; GCN: {{buffer|flat|global}}_load_ushort [[R1:v[0-9]+]],
 ; GCN: {{buffer|flat|global}}_load_ushort [[R2:v[0-9]+]],
@@ -216,7 +220,7 @@
   %r1 = load volatile half, half addrspace(1)* %gep.0
   %r2 = load volatile half, half addrspace(1)* %gep.1
 
-  %r1.fneg = fsub half -0.000000e+00, %r1
+  %r1.fneg = fneg half %r1
 
   %r3 = tail call half @llvm.fmuladd.f16(half -2.0, half %r1.fneg, half %r2)
   store half %r3, half addrspace(1)* %gep.out
@@ -247,7 +251,7 @@
   %r1 = load volatile half, half addrspace(1)* %gep.0
   %r2 = load volatile half, half addrspace(1)* %gep.1
 
-  %r1.fneg = fsub half -0.000000e+00, %r1
+  %r1.fneg = fneg half %r1
 
   %r3 = tail call half @llvm.fmuladd.f16(half 2.0, half %r1.fneg, half %r2)
   store half %r3, half addrspace(1)* %gep.out
@@ -434,11 +438,11 @@
 ; GFX10-DENORM-CONTRACT: v_fmac_f16_e32 [[REGC]], [[REGA]], [[REGB]]
 
 ; GCN-DENORM-STRICT: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-STRICT: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
 ; VI-DENORM: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 
 ; GFX10-FLUSH: v_mul_f16_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GFX10-FLUSH: v_add_f16_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GFX10-FLUSH: v_add_f16_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
 ; GFX10-FLUSH:  global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GFX10-DENORM-STRICT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
 ; GFX10-DENORM-CONTRACT: global_store_short v{{\[[0-9]+:[0-9]+\]}}, [[REGC]]
Index: llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -227,6 +227,10 @@
   ret void
 }
 
+; FIXME: The MAD only folds the FSUB(-0,X) when the FNEG(X) transform
+;        happens in SelectionDAGBuilder. DAGCombiner probably needs to
+;        be updated to fold the FNEG after visitFSUB(...) runs.
+
 ; XXX
 ; GCN-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32
 ; GCN: {{buffer|flat|global}}_load_dword [[R1:v[0-9]+]],
@@ -254,7 +258,7 @@
   %r1 = load volatile float, float addrspace(1)* %gep.0
   %r2 = load volatile float, float addrspace(1)* %gep.1
 
-  %r1.fneg = fsub float -0.000000e+00, %r1
+  %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -287,7 +291,7 @@
   %r1 = load volatile float, float addrspace(1)* %gep.0
   %r2 = load volatile float, float addrspace(1)* %gep.1
 
-  %r1.fneg = fsub float -0.000000e+00, %r1
+  %r1.fneg = fneg float %r1
 
   %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2)
   store float %r3, float addrspace(1)* %gep.out
@@ -478,10 +482,10 @@
 ; GCN-DENORM-FASTFMA-CONTRACT: v_fma_f32 [[RESULT:v[0-9]+]], [[REGA]], [[REGB]], [[REGC]]
 
 ; GCN-DENORM-SLOWFMA-CONTRACT: v_mul_f32_e32 [[TMP:v[0-9]+]],  [[REGA]], [[REGB]]
-; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-SLOWFMA-CONTRACT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
 
 ; GCN-DENORM-STRICT: v_mul_f32_e32 [[TMP:v[0-9]+]], [[REGA]], [[REGB]]
-; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[REGC]], [[TMP]]
+; GCN-DENORM-STRICT: v_add_f32_e32 [[RESULT:v[0-9]+]], [[TMP]], [[REGC]]
 
 ; SI-DENORM: buffer_store_dword [[RESULT]]
 ; VI-DENORM: {{global|flat}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
Index: llvm/test/CodeGen/AMDGPU/fneg-combines.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -8,6 +8,10 @@
 ; fadd tests
 ; --------------------------------------------------------------------------------
 
+; FIXME: I think we want to test FNEG(X) folding here. The FSUB(-0,X) case is
+;        uninteresting. Unless these tests should be split into
+;        GCN-FLUSH/GCN-DENORM checks.
+
 ; GCN-LABEL: {{^}}v_fneg_add_f32:
 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
@@ -26,7 +30,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %add = fadd float %a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -47,7 +51,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %add = fadd float %a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %add, float addrspace(1)* %out
   ret void
@@ -75,7 +79,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %add = fadd float %a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   %use1 = fmul float %add, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -100,9 +104,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -124,9 +128,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %add = fadd float %a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -148,10 +152,10 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %add = fadd float %fneg.a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -177,9 +181,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -205,9 +209,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %add = fadd float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %add
+  %fneg = fneg float %add
   %use1 = fmul float %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -226,7 +230,7 @@
   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
   %.i188 = fadd float %tmp9, 0.000000e+00
   %tmp10 = fcmp uge float %.i188, %tmp2
-  %tmp11 = fsub float -0.000000e+00, %.i188
+  %tmp11 = fneg float %.i188
   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
   %tmp12 = fcmp ule float %.i092, 0.000000e+00
   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
@@ -249,7 +253,7 @@
   %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8
   %.i188 = fadd float %tmp9, 0.000000e+00
   %tmp10 = fcmp uge float %.i188, %tmp2
-  %tmp11 = fsub float -0.000000e+00, %.i188
+  %tmp11 = fneg float %.i188
   %.i092 = select i1 %tmp10, float %tmp2, float %tmp11
   %tmp12 = fcmp ule float %.i092, 0.000000e+00
   %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000
@@ -274,7 +278,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -295,7 +299,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %mul, float addrspace(1)* %out
   ret void
@@ -318,7 +322,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = fmul float %mul, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -338,9 +342,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -358,9 +362,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %mul = fmul float %a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -378,10 +382,10 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %mul = fmul float %fneg.a, %fneg.b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -402,9 +406,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -425,9 +429,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = fmul float %fneg.a, %b
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = fmul float %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -454,7 +458,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -466,7 +470,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #0 {
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -482,7 +486,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float %a, float %a)
-  %min.fneg = fsub float -0.0, %min
+  %min.fneg = fneg float %min
   store float %min.fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -493,7 +497,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_self_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float %a, float %a)
-  %min.fneg = fsub float -0.0, %min
+  %min.fneg = fneg float %min
   ret float %min.fneg
 }
 
@@ -509,7 +513,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -520,7 +524,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_posk_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -536,7 +540,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -547,7 +551,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_negk_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -562,7 +566,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -579,7 +583,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -602,7 +606,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -624,7 +628,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -730,7 +734,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #0 {
   %min = call float @llvm.minnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   ret float %fneg
 }
 
@@ -750,7 +754,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %mul = fmul float %fneg, %b
   store float %mul, float addrspace(1)* %out.gep
   ret void
@@ -779,7 +783,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %mul = fmul float %fneg, %b
   store float %mul, float addrspace(1)* %out.gep
   ret void
@@ -793,7 +797,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
   %min = call float @llvm.minnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %mul = fmul float %fneg, %b
   ret float %mul
 }
@@ -816,7 +820,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %use1 = fmul float %min, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -831,7 +835,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #0 {
   %min = call float @llvm.minnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %min
+  %fneg = fneg float %min
   %use1 = fmul float %min, 4.0
   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
@@ -859,7 +863,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -871,7 +875,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #0 {
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -887,7 +891,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float %a, float %a)
-  %max.fneg = fsub float -0.0, %max
+  %max.fneg = fneg float %max
   store float %max.fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -898,7 +902,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_self_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float %a, float %a)
-  %max.fneg = fsub float -0.0, %max
+  %max.fneg = fneg float %max
   ret float %max.fneg
 }
 
@@ -914,7 +918,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -925,7 +929,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float 4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -941,7 +945,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -952,7 +956,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float -4.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -967,7 +971,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -984,7 +988,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -995,7 +999,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #0 {
   %max = call float @llvm.maxnum.f32(float -0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   ret float %fneg
 }
 
@@ -1015,7 +1019,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %mul = fmul float %fneg, %b
   store float %mul, float addrspace(1)* %out.gep
   ret void
@@ -1029,7 +1033,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #0 {
   %max = call float @llvm.maxnum.f32(float 0.0, float %a)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %mul = fmul float %fneg, %b
   ret float %mul
 }
@@ -1052,7 +1056,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %use1 = fmul float %max, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1067,7 +1071,7 @@
 ; GCN-NEXT: ; return
 define amdgpu_ps <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #0 {
   %max = call float @llvm.maxnum.f32(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %max
+  %fneg = fneg float %max
   %use1 = fmul float %max, 4.0
   %ins0 = insertelement <2 x float> undef, float %fneg, i32 0
   %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1
@@ -1099,7 +1103,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1123,7 +1127,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fma, float addrspace(1)* %out
   ret void
@@ -1154,7 +1158,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fma.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   %use1 = fmul float %fma, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1182,9 +1186,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1210,9 +1214,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1238,10 +1242,10 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1267,10 +1271,10 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.c = fsub float -0.000000e+00, %c
+  %fneg.a = fneg float %a
+  %fneg.c = fneg float %c
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1296,9 +1300,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.c = fsub float -0.000000e+00, %c
+  %fneg.c = fneg float %c
   %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1330,9 +1334,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -1360,9 +1364,9 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   %use1 = fmul float %fneg.a, %d
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1394,7 +1398,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1446,7 +1450,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c)
-  %fneg = fsub float -0.000000e+00, %fma
+  %fneg = fneg float %fma
   %use1 = fmul float %fma, 4.0
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1483,7 +1487,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpext = fpext float %fneg.a to double
   %fneg = fsub double -0.000000e+00, %fpext
   store double %fneg, double addrspace(1)* %out.gep
@@ -1502,7 +1506,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpext = fpext float %fneg.a to double
   %fneg = fsub double -0.000000e+00, %fpext
   store volatile double %fneg, double addrspace(1)* %out.gep
@@ -1559,7 +1563,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile half, half addrspace(1)* %a.gep
   %fpext = fpext half %a to float
-  %fneg = fsub float -0.000000e+00, %fpext
+  %fneg = fneg float %fpext
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %fpext, float addrspace(1)* %out.gep
   ret void
@@ -1573,7 +1577,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile half, half addrspace(1)* %a.gep
   %fpext = fpext half %a to float
-  %fneg = fsub float -0.000000e+00, %fpext
+  %fneg = fneg float %fpext
   %mul = fmul float %fpext, 4.0
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %mul, float addrspace(1)* %out.gep
@@ -1595,7 +1599,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile double, double addrspace(1)* %a.gep
   %fpround = fptrunc double %a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1612,7 +1616,7 @@
   %a = load volatile double, double addrspace(1)* %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1631,7 +1635,7 @@
   %a = load volatile double, double addrspace(1)* %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile double %fneg.a, double addrspace(1)* undef
   ret void
@@ -1652,7 +1656,7 @@
   %a = load volatile double, double addrspace(1)* %a.gep
   %fneg.a = fsub double -0.000000e+00, %a
   %fpround = fptrunc double %fneg.a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   %use1 = fmul double %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile double %use1, double addrspace(1)* undef
@@ -1685,7 +1689,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   store half %fneg, half addrspace(1)* %out.gep
@@ -1705,7 +1709,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile double, double addrspace(1)* %a.gep
   %fpround = fptrunc double %a to float
-  %fneg = fsub float -0.000000e+00, %fpround
+  %fneg = fneg float %fpround
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %fpround, float addrspace(1)* %out.gep
   ret void
@@ -1723,7 +1727,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   store volatile half %fneg, half addrspace(1)* %out.gep
@@ -1743,7 +1747,7 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %fpround = fptrunc float %fneg.a to half
   %fneg = fsub half -0.000000e+00, %fpround
   %use1 = fmul float %fneg.a, %c
@@ -1767,7 +1771,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %rcp = call float @llvm.amdgcn.rcp.f32(float %a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1782,9 +1786,9 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1801,9 +1805,9 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %fneg.a, float addrspace(1)* undef
   ret void
@@ -1821,9 +1825,9 @@
   %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a)
-  %fneg = fsub float -0.000000e+00, %rcp
+  %fneg = fneg float %rcp
   %use1 = fmul float %fneg.a, %c
   store volatile float %fneg, float addrspace(1)* %out.gep
   store volatile float %use1, float addrspace(1)* undef
@@ -1848,7 +1852,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -1869,7 +1873,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %mul, float addrspace(1)* %out
   ret void
@@ -1891,7 +1895,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0)
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -1911,9 +1915,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1931,9 +1935,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.b = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1951,10 +1955,10 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
-  %fneg.b = fsub float -0.000000e+00, %b
+  %fneg.a = fneg float %a
+  %fneg.b = fneg float %b
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   ret void
 }
@@ -1974,9 +1978,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %fneg.a, float addrspace(1)* %out
   ret void
@@ -1997,9 +2001,9 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
-  %fneg.a = fsub float -0.000000e+00, %a
+  %fneg.a = fneg float %a
   %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b)
-  %fneg = fsub float -0.000000e+00, %mul
+  %fneg = fneg float %mul
   %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c)
   store volatile float %fneg, float addrspace(1)* %out
   store volatile float %use1, float addrspace(1)* %out
@@ -2023,7 +2027,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %sin = call float @llvm.sin.f32(float %a)
-  %fneg = fsub float -0.000000e+00, %sin
+  %fneg = fneg float %sin
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2039,7 +2043,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %sin = call float @llvm.amdgcn.sin.f32(float %a)
-  %fneg = fsub float -0.0, %sin
+  %fneg = fneg float %sin
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2059,7 +2063,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %trunc = call float @llvm.trunc.f32(float %a)
-  %fneg = fsub float -0.0, %trunc
+  %fneg = fneg float %trunc
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2086,7 +2090,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %round = call float @llvm.round.f32(float %a)
-  %fneg = fsub float -0.0, %round
+  %fneg = fneg float %round
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2106,7 +2110,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %rint = call float @llvm.rint.f32(float %a)
-  %fneg = fsub float -0.0, %rint
+  %fneg = fneg float %rint
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2126,7 +2130,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %nearbyint = call float @llvm.nearbyint.f32(float %a)
-  %fneg = fsub float -0.0, %nearbyint
+  %fneg = fneg float %nearbyint
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2146,7 +2150,7 @@
   %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext
   %a = load volatile float, float addrspace(1)* %a.gep
   %trunc = call float @llvm.canonicalize.f32(float %a)
-  %fneg = fsub float -0.0, %trunc
+  %fneg = fneg float %trunc
   store float %fneg, float addrspace(1)* %out.gep
   ret void
 }
@@ -2170,7 +2174,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0)
   store volatile float %intrp0, float addrspace(1)* %out.gep
@@ -2193,7 +2197,7 @@
   %a = load volatile float, float addrspace(1)* %a.gep
   %b = load volatile float, float addrspace(1)* %b.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0)
   %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0)
   store volatile float %intrp0, float addrspace(1)* %out.gep
@@ -2230,7 +2234,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   %cmp0 = icmp eq i32 %d, 0
   br i1 %cmp0, label %if, label %endif
 
@@ -2266,7 +2270,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   call void asm sideeffect "; use $0", "v"(float %fneg) #0
   store volatile float %fneg, float addrspace(1)* %out.gep
   ret void
@@ -2295,7 +2299,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
   %mul = fmul float %a, %b
-  %fneg = fsub float -0.0, %mul
+  %fneg = fneg float %mul
   call void asm sideeffect "; use $0", "v"(float %fneg) #0
   store volatile float %mul, float addrspace(1)* %out.gep
   ret void
@@ -2328,7 +2332,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
 
-  %fneg.a = fsub float -0.0, %a
+  %fneg.a = fneg float %a
   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c)
   %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0)
 
@@ -2360,7 +2364,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
 
-  %fneg.a = fsub float -0.0, %a
+  %fneg.a = fneg float %a
   %mul0 = fmul float %fneg.a, %b
   %mul1 = fmul float %fneg.a, %c
 
@@ -2391,7 +2395,7 @@
   %b = load volatile float, float addrspace(1)* %b.gep
   %c = load volatile float, float addrspace(1)* %c.gep
 
-  %fneg.a = fsub float -0.0, %a
+  %fneg.a = fneg float %a
   %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0)
   %mul1 = fmul float %fneg.a, %c
 
@@ -2433,7 +2437,7 @@
   %d = load volatile float, float addrspace(1)* %d.gep
 
   %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0)
-  %fneg.fma0 = fsub float -0.0, %fma0
+  %fneg.fma0 = fneg float %fma0
   %mul1 = fmul float %fneg.fma0, %c
   %mul2 = fmul float %fneg.fma0, %d
 
@@ -2501,7 +2505,7 @@
   %d = load volatile float, float addrspace(1)* %d.gep
 
   %trunc.a = call float @llvm.trunc.f32(float %a)
-  %trunc.fneg.a = fsub float -0.0, %trunc.a
+  %trunc.fneg.a = fneg float %trunc.a
   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
   store volatile float %fma0, float addrspace(1)* %out
   ret void
@@ -2531,7 +2535,7 @@
   %d = load volatile float, float addrspace(1)* %d.gep
 
   %trunc.a = call float @llvm.trunc.f32(float %a)
-  %trunc.fneg.a = fsub float -0.0, %trunc.a
+  %trunc.fneg.a = fneg float %trunc.a
   %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c)
   %mul1 = fmul float %trunc.a, %d
   store volatile float %fma0, float addrspace(1)* %out
Index: llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
+++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmed3.ll
@@ -32,7 +32,7 @@
 
 ; GCN-LABEL: {{^}}test_fneg_fmed3_multi_use:
 ; GCN: v_med3_f32 [[MED3:v[0-9]+]], -s{{[0-9]+}}, -v{{[0-9]+}}, -v{{[0-9]+}}
-; GCN: v_mul_f32_e32 v{{[0-9]+}}, -4.0, [[MED3]]
+; GCN: v_mul_f32_e64 v{{[0-9]+}}, -[[MED3]], 4.0
 define amdgpu_kernel void @test_fneg_fmed3_multi_use(float addrspace(1)* %out, float %src0, float %src1, float %src2) #1 {
   %med3 = call float @llvm.amdgcn.fmed3.f32(float %src0, float %src1, float %src2)
   %neg.med3 = fsub float -0.0, %med3
Index: llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
+++ llvm/test/CodeGen/AMDGPU/selectcc-opt.ll
@@ -2,6 +2,8 @@
 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
 
+; FIXME: Not sure what to do about these tests. The FSUB(-0.0,X) is being
+;        folded into the select, before there's a chance to convert to FNEG(X).
 
 ; FUNC-LABEL: {{^}}test_a:
 ; EG-NOT: CND
@@ -11,7 +13,7 @@
 entry:
   %0 = fcmp olt float %in, 0.000000e+00
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   %4 = bitcast i32 %3 to float
   %5 = bitcast float %4 to i32
@@ -39,7 +41,7 @@
 entry:
   %0 = fcmp olt float %in, 0.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   %4 = bitcast i32 %3 to float
   %5 = bitcast float %4 to i32
Index: llvm/test/CodeGen/AMDGPU/set-dx10.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/set-dx10.ll
+++ llvm/test/CodeGen/AMDGPU/set-dx10.ll
@@ -4,6 +4,9 @@
 ; to store integer true (-1) and false (0) values are lowered to one of the
 ; SET*DX10 instructions.
 
+; FIXME: Not sure what to do about these tests. The FSUB(-0.0,X) is being
+;        folded into the select, before there's a chance to convert to FNEG(X).
+
 ; CHECK: {{^}}fcmp_une_select_fptosi:
 ; CHECK: LSHR
 ; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
@@ -12,7 +15,7 @@
 entry:
   %0 = fcmp une float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -38,7 +41,7 @@
 entry:
   %0 = fcmp oeq float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -64,7 +67,7 @@
 entry:
   %0 = fcmp ogt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -90,7 +93,7 @@
 entry:
   %0 = fcmp oge float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -116,7 +119,7 @@
 entry:
   %0 = fcmp ole float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void
@@ -142,7 +145,7 @@
 entry:
   %0 = fcmp olt float %in, 5.0
   %1 = select i1 %0, float 1.000000e+00, float 0.000000e+00
-  %2 = fsub float -0.000000e+00, %1
+  %2 = fneg float %1
   %3 = fptosi float %2 to i32
   store i32 %3, i32 addrspace(1)* %out
   ret void