diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9646,6 +9646,30 @@
     return SimplifySelect(DL, N0, N1, N2);
   }
 
+  if (N1.getOpcode() == N2.getOpcode() && TLI.isBinOp(N1.getOpcode()) &&
+      N->isOnlyUserOf(N0.getNode()) && N->isOnlyUserOf(N1.getNode())) {
+    // Fold select(cond, binop(x, y), binop(z, y))
+    //  --> binop(select(cond, x, z), y)
+    if (N1.getOperand(1) == N2.getOperand(1)) {
+      SDValue NewSel =
+          DAG.getSelect(DL, VT, N0, N1.getOperand(0), N2.getOperand(0));
+      return DAG.getNode(N1.getOpcode(), DL, VT, NewSel, N1.getOperand(1));
+    }
+
+    // Fold select(cond, binop(x, y), binop(x, z))
+    //  --> binop(x, select(cond, y, z))
+    // Second op VT might be different (e.g. shift amount type)
+    if (N1.getOperand(0) == N2.getOperand(0) &&
+        VT == N1.getOperand(1).getValueType() &&
+        VT == N2.getOperand(1).getValueType()) {
+      SDValue NewSel =
+          DAG.getSelect(DL, VT, N0, N1.getOperand(1), N2.getOperand(1));
+      return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0), NewSel);
+    }
+
+    // TODO: Handle isCommutativeBinOp as well ?
+  }
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/NVPTX/fast-math.ll b/llvm/test/CodeGen/NVPTX/fast-math.ll
--- a/llvm/test/CodeGen/NVPTX/fast-math.ll
+++ b/llvm/test/CodeGen/NVPTX/fast-math.ll
@@ -146,10 +146,8 @@
 
 ; CHECK-LABEL: repeated_div_recip_allowed
 define float @repeated_div_recip_allowed(i1 %pred, float %a, float %b, float %divisor) {
-; CHECK: rcp.rn.f32
-; CHECK: mul.rn.f32
-; CHECK: mul.rn.f32
 ; CHECK: selp.f32
+; CHECK: div.rn.f32
   %x = fdiv arcp float %a, %divisor
   %y = fdiv arcp float %b, %divisor
   %z = select i1 %pred, float %x, float %y
@@ -158,10 +156,8 @@
 
 ; CHECK-LABEL: repeated_div_recip_allowed_ftz
 define float @repeated_div_recip_allowed_ftz(i1 %pred, float %a, float %b, float %divisor) #1 {
-; CHECK: rcp.rn.ftz.f32
-; CHECK: mul.rn.ftz.f32
-; CHECK: mul.rn.ftz.f32
 ; CHECK: selp.f32
+; CHECK: div.rn.ftz.f32
   %x = fdiv arcp float %a, %divisor
   %y = fdiv arcp float %b, %divisor
   %z = select i1 %pred, float %x, float %y
@@ -170,10 +166,8 @@
 
 ; CHECK-LABEL: repeated_div_fast
 define float @repeated_div_fast(i1 %pred, float %a, float %b, float %divisor) #0 {
-; CHECK: rcp.approx.f32
-; CHECK: mul.f32
-; CHECK: mul.f32
 ; CHECK: selp.f32
+; CHECK: div.approx.f32
   %x = fdiv float %a, %divisor
   %y = fdiv float %b, %divisor
   %z = select i1 %pred, float %x, float %y
@@ -182,10 +176,8 @@
 
 ; CHECK-LABEL: repeated_div_fast_ftz
 define float @repeated_div_fast_ftz(i1 %pred, float %a, float %b, float %divisor) #0 #1 {
-; CHECK: rcp.approx.ftz.f32
-; CHECK: mul.ftz.f32
-; CHECK: mul.ftz.f32
 ; CHECK: selp.f32
+; CHECK: div.approx.ftz.f32
   %x = fdiv float %a, %divisor
   %y = fdiv float %b, %divisor
   %z = select i1 %pred, float %x, float %y
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -8,37 +8,40 @@
 %0 = type { i64, i32 }
 
 define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
-; CHECK-LABEL: test1:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    addq $8, %rdi
-; CHECK-NEXT:    addq $8, %rsi
-; CHECK-NEXT:    testb $1, %dl
-; CHECK-NEXT:    cmovneq %rdi, %rsi
-; CHECK-NEXT:    movl (%rsi), %eax
-; CHECK-NEXT:    retq
+; GENERIC-LABEL: test1:
+; GENERIC:       ## %bb.0:
+; GENERIC-NEXT:    testb $1, %dl
+; GENERIC-NEXT:    cmoveq %rsi, %rdi
+; GENERIC-NEXT:    movl 8(%rdi), %eax
+; GENERIC-NEXT:    retq
+;
+; ATOM-LABEL: test1:
+; ATOM:       ## %bb.0:
+; ATOM-NEXT:    testb $1, %dl
+; ATOM-NEXT:    cmoveq %rsi, %rdi
+; ATOM-NEXT:    movl 8(%rdi), %eax
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    nop
+; ATOM-NEXT:    retq
 ;
 ; ATHLON-LABEL: test1:
 ; ATHLON:       ## %bb.0:
-; ATHLON-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; ATHLON-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; ATHLON-NEXT:    addl $8, %ecx
-; ATHLON-NEXT:    addl $8, %eax
 ; ATHLON-NEXT:    testb $1, {{[0-9]+}}(%esp)
-; ATHLON-NEXT:    cmovnel %ecx, %eax
-; ATHLON-NEXT:    movl (%eax), %eax
+; ATHLON-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; ATHLON-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; ATHLON-NEXT:    cmovnel %eax, %ecx
+; ATHLON-NEXT:    movl (%ecx), %eax
+; ATHLON-NEXT:    movl 8(%eax), %eax
 ; ATHLON-NEXT:    retl
 ;
 ; MCU-LABEL: test1:
 ; MCU:       # %bb.0:
 ; MCU-NEXT:    testb $1, %cl
-; MCU-NEXT:    jne .LBB0_1
-; MCU-NEXT:  # %bb.2:
-; MCU-NEXT:    addl $8, %edx
-; MCU-NEXT:    movl (%edx), %eax
-; MCU-NEXT:    retl
-; MCU-NEXT:  .LBB0_1:
-; MCU-NEXT:    addl $8, %eax
-; MCU-NEXT:    movl (%eax), %eax
+; MCU-NEXT:    jne .LBB0_2
+; MCU-NEXT:  # %bb.1:
+; MCU-NEXT:    movl %edx, %eax
+; MCU-NEXT:  .LBB0_2:
+; MCU-NEXT:    movl 8(%eax), %eax
 ; MCU-NEXT:    retl
   %t0 = load %0, %0* %p
   %t1 = load %0, %0* %q