Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1901,8 +1901,19 @@
     return SDValue();
 
   // Bail out if any constants are opaque because we can't constant fold those.
+  // The exception is "and" and "or" with either 0 or -1 in which case we can
+  // propagate non constant operands into select.
+  bool CanFoldNonConst = false;
+  if (BinOpcode == ISD::AND || BinOpcode == ISD::OR) {
+    ConstantSDNode *CTN = cast<ConstantSDNode>(CT);
+    ConstantSDNode *CFN = cast<ConstantSDNode>(CF);
+    CanFoldNonConst = (CTN->isNullValue() || CTN->isAllOnesValue()) &&
+                      (CFN->isNullValue() || CFN->isAllOnesValue());
+  }
+
   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
-  if (!isConstantOrConstantVector(CBO, true) &&
+  if (!CanFoldNonConst &&
+      !isConstantOrConstantVector(CBO, true) &&
       !isConstantFPBuildVectorOrConstantFP(CBO))
     return SDValue();
 
@@ -1923,14 +1934,14 @@
   SDLoc DL(Sel);
   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
-  if (!NewCT.isUndef() &&
+  if (!CanFoldNonConst && !NewCT.isUndef() &&
       !isConstantOrConstantVector(NewCT, true) &&
       !isConstantFPBuildVectorOrConstantFP(NewCT))
     return SDValue();
 
   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
-  if (!NewCF.isUndef() &&
+  if (!CanFoldNonConst && !NewCF.isUndef() &&
       !isConstantOrConstantVector(NewCF, true) &&
       !isConstantFPBuildVectorOrConstantFP(NewCF))
     return SDValue();
Index: test/CodeGen/AMDGPU/dagcombine-select.ll
===================================================================
--- test/CodeGen/AMDGPU/dagcombine-select.ll
+++ test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,5 +1,53 @@
 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
+; GCN-LABEL: {{^}}select_and1:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN-NOT: v_and_b32
+; GCN:     store_dword [[SEL]],
+define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = and i32 %y, %s
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_and2:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN-NOT: v_and_b32
+; GCN:     store_dword [[SEL]],
+define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = and i32 %s, %y
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_or1:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN-NOT: v_or_b32
+; GCN:     store_dword [[SEL]],
+define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = or i32 %y, %s
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_or2:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN-NOT: v_or_b32
+; GCN:     store_dword [[SEL]],
+define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = or i32 %s, %y
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
 ; v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
Index: test/CodeGen/AMDGPU/udivrem.ll
===================================================================
--- test/CodeGen/AMDGPU/udivrem.ll
+++ test/CodeGen/AMDGPU/udivrem.ll
@@ -31,25 +31,25 @@
 ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
 ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
 ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
-; SI: v_cndmask_b32_e64
-; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
+; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]]
+; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]]
+; SI: v_mul_hi_u32 [[E:v[0-9]+]], [[CND1]], [[RCP]]
 ; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
-; SI: v_cndmask_b32_e64
-; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
-; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
+; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]]
+; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]],
+; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]]
 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
 ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]],
 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]],
-; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]],
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
+; SI-NOT: v_and_b32
 ; SI: s_endpgm
 define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
   %result0 = udiv i32 %x, %y
@@ -124,8 +124,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -147,8 +145,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -157,6 +153,7 @@
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
+; SI-NOT: v_and_b32
 ; SI: s_endpgm
 define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
   %result0 = udiv <2 x i32> %x, %y
@@ -274,8 +271,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -297,8 +292,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -320,8 +313,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -339,6 +330,7 @@
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
+; SI-NOT: v_and_b32
 ; SI: s_endpgm
 define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
   %result0 = udiv <4 x i32> %x, %y
Index: test/CodeGen/X86/dagcombine-select.ll
===================================================================
--- test/CodeGen/X86/dagcombine-select.ll
+++ test/CodeGen/X86/dagcombine-select.ll
@@ -6,9 +6,7 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    cmovgel %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -21,9 +19,7 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    cmovgel %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -34,11 +30,9 @@
 define i32 @select_or1(i32 %x, i32 %y) {
 ; CHECK-LABEL: select_or1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    cmovll %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -49,11 +43,9 @@
 define i32 @select_or2(i32 %x, i32 %y) {
 ; CHECK-LABEL: select_or2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    cmovll %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1