Index: lib/Target/R600/SIISelLowering.cpp
===================================================================
--- lib/Target/R600/SIISelLowering.cpp
+++ lib/Target/R600/SIISelLowering.cpp
@@ -1002,8 +1002,45 @@
   SDValue Zero = DAG.getConstant(0, MVT::i32);
   SDValue One = DAG.getConstant(1, MVT::i32);
 
-  SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
-  SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
+  SDValue LHS = Op.getOperand(1);
+  SDValue RHS = Op.getOperand(2);
+
+  // Undo combine done in visitSINT_TO_FP / visitUINT_TO_FP.
+  // f64 (select (i1 cnd), [+|-]1.0, 0.0) -> f64 [u|s]int_to_fp (i1 cnd)
+  //
+  // It is larger and expensive to do the 2 selects and materialize the weird
+  // constant than selecting an i32 -1 / 0 and doing the conversion to f64.
+  //
+  // = 16 byte, 12 cycle
+  // v_cndmask_b32_e32 v0, 0, -1, s[0:1]
+  // v_cvt_f64_i32_e32 v[0:1], v0
+  //
+  // vs.
+  //
+  // = 20 byte, 16 cycle
+  // v_mov_b32_e32 v0, 0xbff00000
+  // v_cndmask_b32_e64 v1, 0, v0, s[0:1]
+  // v_mov_b32 v0, 0
+  //
+
+  if (const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS)) {
+    if (CRHS->isNullValue()) {
+      if (const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS)) {
+        if (CLHS->getZExtValue() == DoubleToBits(-1.0)) {
+          SDValue Cvt = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f64, Cond);
+          return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Cvt);
+        }
+
+        if (CLHS->getZExtValue() == DoubleToBits(1.0)) {
+          SDValue Cvt = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f64, Cond);
+          return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Cvt);
+        }
+      }
+    }
+  }
+
+  LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, LHS);
+  RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, RHS);
 
   SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
   SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
Index: test/CodeGen/R600/fceil64.ll
===================================================================
--- test/CodeGen/R600/fceil64.ll
+++ test/CodeGen/R600/fceil64.ll
@@ -25,8 +25,8 @@
 ; SI: v_cmp_lg_f64
 ; SI: v_cmp_gt_f64
 ; SI: s_and_b64
-; SI: v_cndmask_b32
-; SI: v_cndmask_b32
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, 1,
+; SI-NEXT: v_cvt_f64_u32_e32
 ; SI: v_add_f64
 ; SI: s_endpgm
 define void @fceil_f64(double addrspace(1)* %out, double %x) {
Index: test/CodeGen/R600/ffloor.ll
===================================================================
--- test/CodeGen/R600/ffloor.ll
+++ test/CodeGen/R600/ffloor.ll
@@ -26,8 +26,8 @@
 ; SI: v_cmp_lg_f64
 ; SI: v_cmp_lt_f64
 ; SI: s_and_b64
-; SI: v_cndmask_b32
-; SI: v_cndmask_b32
+; SI: v_cndmask_b32_e64 {{v[0-9]+}}, 0, -1,
+; SI-NEXT: v_cvt_f64_i32_e32
 ; SI: v_add_f64
 ; SI: s_endpgm
 define void @ffloor_f64(double addrspace(1)* %out, double %x) {
Index: test/CodeGen/R600/sint_to_fp.f64.ll
===================================================================
--- test/CodeGen/R600/sint_to_fp.f64.ll
+++ test/CodeGen/R600/sint_to_fp.f64.ll
@@ -12,11 +12,9 @@
 
 ; SI-LABEL: {{^}}sint_to_fp_i1_f64:
 ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
-; uses an SGPR for [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: buffer_store_dwordx2
+; SI: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, [[CMP]]
+; SI: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]]
+; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
 define void @sint_to_fp_i1_f64(double addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0
@@ -58,3 +56,42 @@
   store double %result, double addrspace(1)* %out
   ret void
 }
+
+; SI-LABEL: {{^}}select_sint_to_fp_i1_vals_f64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, [[CMP]]
+; SI: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double -1.0, double 0.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: {{^}}select_sint_to_fp_i1_vals_i64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, -1, [[CMP]]
+; SI: v_cvt_f64_i32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @select_sint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, i64 u0xbff0000000000000, i64 0
+  store i64 %select, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+; SI-LABEL: {{^}}swap_select_sint_to_fp_i1_vals_f64:
+; SI-NOT: v_cvt_f64_u32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
+define void @swap_select_sint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 0.0, double -1.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
Index: test/CodeGen/R600/uint_to_fp.f64.ll
===================================================================
--- test/CodeGen/R600/uint_to_fp.f64.ll
+++ test/CodeGen/R600/uint_to_fp.f64.ll
@@ -72,11 +72,9 @@
 
 ; SI-LABEL: {{^}}uint_to_fp_i1_to_f64:
 ; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
-; We can't fold the SGPRs into v_cndmask_b32_e64, because it already
-; uses an SGPR for [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: v_cndmask_b32_e64 v{{[0-9]+}}, 0, v{{[0-9]+}}, [[CMP]]
-; SI: buffer_store_dwordx2
+; SI: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, [[CMP]]
+; SI: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]]
+; SI: buffer_store_dwordx2 [[RESULT]]
 ; SI: s_endpgm
 define void @uint_to_fp_i1_to_f64(double addrspace(1)* %out, i32 %in) {
   %cmp = icmp eq i32 %in, 0
@@ -95,3 +93,42 @@
   store double %fp, double addrspace(1)* %out, align 8
   ret void
 }
+
+; SI-LABEL: {{^}}select_uint_to_fp_i1_vals_f64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, [[CMP]]
+; SI: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 1.0, double 0.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}
+
+; SI-LABEL: {{^}}select_uint_to_fp_i1_vals_i64:
+; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
+; SI: v_cndmask_b32_e64 [[TMP:v[0-9]+]], 0, 1, [[CMP]]
+; SI: v_cvt_f64_u32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[TMP]]
+; SI: buffer_store_dwordx2 [[RESULT]]
+; SI: s_endpgm
+define void @select_uint_to_fp_i1_vals_i64(i64 addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, i64 u0x3ff0000000000000, i64 0
+  store i64 %select, i64 addrspace(1)* %out, align 8
+  ret void
+}
+
+; TODO: This should swap the selected order / invert the compare and do it.
+; SI-LABEL: {{^}}swap_select_uint_to_fp_i1_vals_f64:
+; SI-NOT: v_cvt_f64_u32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e64
+; SI: s_endpgm
+define void @swap_select_uint_to_fp_i1_vals_f64(double addrspace(1)* %out, i32 %in) {
+  %cmp = icmp eq i32 %in, 0
+  %select = select i1 %cmp, double 0.0, double 1.0
+  store double %select, double addrspace(1)* %out, align 8
+  ret void
+}