Index: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1901,8 +1901,19 @@
     return SDValue();
 
   // Bail out if any constants are opaque because we can't constant fold those.
+  // The exception is "and" and "or" with either 0 or -1 in which case we can
+  // propagate non constant operands into select. I.e.:
+  // and (select Cond, 0, -1), X --> select Cond, 0, X
+  // or X, (select Cond, -1, 0) --> select Cond, -1, X
+  bool CanFoldNonConst = (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
+                         (isNullConstantOrNullSplatConstant(CT) ||
+                          isAllOnesConstantOrAllOnesSplatConstant(CT)) &&
+                         (isNullConstantOrNullSplatConstant(CF) ||
+                          isAllOnesConstantOrAllOnesSplatConstant(CF));
+
   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
-  if (!isConstantOrConstantVector(CBO, true) &&
+  if (!CanFoldNonConst &&
+      !isConstantOrConstantVector(CBO, true) &&
       !isConstantFPBuildVectorOrConstantFP(CBO))
     return SDValue();
 
@@ -1923,14 +1934,14 @@
   SDLoc DL(Sel);
   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
-  if (!NewCT.isUndef() &&
+  if (!CanFoldNonConst && !NewCT.isUndef() &&
       !isConstantOrConstantVector(NewCT, true) &&
       !isConstantFPBuildVectorOrConstantFP(NewCT))
     return SDValue();
 
   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
-  if (!NewCF.isUndef() &&
+  if (!CanFoldNonConst && !NewCF.isUndef() &&
       !isConstantOrConstantVector(NewCF, true) &&
       !isConstantFPBuildVectorOrConstantFP(NewCF))
     return SDValue();
Index: llvm/trunk/test/CodeGen/AMDGPU/dagcombine-select.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dagcombine-select.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/dagcombine-select.ll
@@ -1,5 +1,107 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
 
+; GCN-LABEL: {{^}}select_and1:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN-NOT: v_and_b32
+; GCN:     store_dword v[{{[0-9:]+}}], [[SEL]],
+define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = and i32 %y, %s
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_and2:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN-NOT: v_and_b32
+; GCN:     store_dword v[{{[0-9:]+}}], [[SEL]],
+define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = and i32 %s, %y
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_and3:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN-NOT: v_and_b32
+; GCN:     store_dword v[{{[0-9:]+}}], [[SEL]],
+define amdgpu_kernel void @select_and3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 -1, i32 0
+  %a = and i32 %y, %s
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_and_v4:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}},
+; GCN-NOT: v_and_b32
+; GCN:     store_dword
+define amdgpu_kernel void @select_and_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  %a = and <4 x i32> %s, %y
+  store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_or1:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN-NOT: v_or_b32
+; GCN:     store_dword v[{{[0-9:]+}}], [[SEL]],
+define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = or i32 %y, %s
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_or2:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN-NOT: v_or_b32
+; GCN:     store_dword v[{{[0-9:]+}}], [[SEL]],
+define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 0, i32 -1
+  %a = or i32 %s, %y
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_or3:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN-NOT: v_or_b32
+; GCN:     store_dword v[{{[0-9:]+}}], [[SEL]],
+define amdgpu_kernel void @select_or3(i32 addrspace(1)* %p, i32 %x, i32 %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 -1, i32 0
+  %a = or i32 %y, %s
+  store i32 %a, i32 addrspace(1)* %p, align 4
+  ret void
+}
+
+; GCN-LABEL: {{^}}select_or_v4:
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN:     v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}},
+; GCN-NOT: v_or_b32
+; GCN:     store_dword
+define amdgpu_kernel void @select_or_v4(<4 x i32> addrspace(1)* %p, i32 %x, <4 x i32> %y) {
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+  %a = or <4 x i32> %s, %y
+  store <4 x i32> %a, <4 x i32> addrspace(1)* %p, align 32
+  ret void
+}
+
 ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants:
 ; GCN: v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9,
 define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) {
Index: llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/udivrem.ll
@@ -31,25 +31,25 @@
 ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]]
 ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]]
 ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]]
-; SI: v_cndmask_b32_e64
-; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]]
+; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]]
+; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]]
+; SI: v_mul_hi_u32 [[E:v[0-9]+]], [[CND1]], [[RCP]]
 ; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]]
 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]]
-; SI: v_cndmask_b32_e64
-; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]]
-; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]]
+; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]]
+; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]],
+; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]]
 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]]
 ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]]
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]],
 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]],
-; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]]
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]],
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
+; SI-NOT: v_and_b32
 ; SI: s_endpgm
 define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
   %result0 = udiv i32 %x, %y
@@ -124,8 +124,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -147,8 +145,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -157,6 +153,7 @@
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
 ; SI-DAG: v_cndmask_b32_e64
+; SI-NOT: v_and_b32
 ; SI: s_endpgm
 define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) {
   %result0 = udiv <2 x i32> %x, %y
@@ -274,8 +271,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -297,8 +292,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -320,8 +313,6 @@
 ; SI-DAG: v_mul_lo_i32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_cndmask_b32_e64
-; SI-DAG: v_and_b32_e32
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
@@ -339,6 +330,7 @@
 ; SI-DAG: v_add_{{[iu]}}32_e32
 ; SI-DAG: v_subrev_{{[iu]}}32_e32
 ; SI-DAG: v_cndmask_b32_e64
+; SI-NOT: v_and_b32
 ; SI: s_endpgm
 define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) {
   %result0 = udiv <4 x i32> %x, %y
Index: llvm/trunk/test/CodeGen/X86/dagcombine-select.ll
===================================================================
--- llvm/trunk/test/CodeGen/X86/dagcombine-select.ll
+++ llvm/trunk/test/CodeGen/X86/dagcombine-select.ll
@@ -6,9 +6,7 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    cmovgel %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -21,9 +19,7 @@
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    andl %esi, %eax
+; CHECK-NEXT:    cmovgel %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -31,14 +27,42 @@
   ret i32 %a
 }
 
+define i32 @select_and3(i32 %x, i32 %y) {
+; CHECK-LABEL: select_and3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    xorl %eax, %eax
+; CHECK-NEXT:    cmpl $11, %edi
+; CHECK-NEXT:    cmovll %esi, %eax
+; CHECK-NEXT:    retq
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 -1, i32 0
+  %a = and i32 %y, %s
+  ret i32 %a
+}
+
+define <4 x i32> @select_and_v4(i32 %x, <4 x i32> %y) {
+; CHECK-LABEL: select_and_v4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $11, %edi
+; CHECK-NEXT:    xorps %xmm1, %xmm1
+; CHECK-NEXT:    jl .LBB3_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    movaps %xmm0, %xmm1
+; CHECK-NEXT:  .LBB3_2:
+; CHECK-NEXT:    movaps %xmm1, %xmm0
+; CHECK-NEXT:    retq
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1>
+  %a = and <4 x i32> %s, %y
+  ret <4 x i32> %a
+}
+
 define i32 @select_or1(i32 %x, i32 %y) {
 ; CHECK-LABEL: select_or1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    cmovll %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -49,11 +73,9 @@
 define i32 @select_or2(i32 %x, i32 %y) {
 ; CHECK-LABEL: select_or2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    cmpl $11, %edi
-; CHECK-NEXT:    setl %al
-; CHECK-NEXT:    decl %eax
-; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    cmovll %esi, %eax
 ; CHECK-NEXT:    retq
   %c = icmp slt i32 %x, 11
   %s = select i1 %c, i32 0, i32 -1
@@ -61,6 +83,34 @@
   ret i32 %a
 }
 
+define i32 @select_or3(i32 %x, i32 %y) {
+; CHECK-LABEL: select_or3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $11, %edi
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    cmovgel %esi, %eax
+; CHECK-NEXT:    retq
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, i32 -1, i32 0
+  %a = or i32 %y, %s
+  ret i32 %a
+}
+
+define <4 x i32> @select_or_v4(i32 %x, <4 x i32> %y) {
+; CHECK-LABEL: select_or_v4:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cmpl $11, %edi
+; CHECK-NEXT:    jl .LBB7_2
+; CHECK-NEXT:  # %bb.1:
+; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
+; CHECK-NEXT:  .LBB7_2:
+; CHECK-NEXT:    retq
+  %c = icmp slt i32 %x, 11
+  %s = select i1 %c, <4 x i32> zeroinitializer, <4 x i32><i32 -1, i32 -1, i32 -1, i32 -1>
+  %a = or <4 x i32> %s, %y
+  ret <4 x i32> %a
+}
+
 define i32 @sel_constants_sub_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: sel_constants_sub_constant_sel_constants:
 ; CHECK:       # %bb.0:
@@ -186,11 +236,11 @@
 ; CHECK-LABEL: fsub_constant_sel_constants:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    jne .LBB13_1
+; CHECK-NEXT:    jne .LBB17_1
 ; CHECK-NEXT:  # %bb.2:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB13_1:
+; CHECK-NEXT:  .LBB17_1:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, double -4.0, double 23.3
@@ -202,11 +252,11 @@
 ; CHECK-LABEL: fdiv_constant_sel_constants:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    jne .LBB14_1
+; CHECK-NEXT:    jne .LBB18_1
 ; CHECK-NEXT:  # %bb.2:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB14_1:
+; CHECK-NEXT:  .LBB18_1:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, double -4.0, double 23.3
@@ -218,11 +268,11 @@
 ; CHECK-LABEL: frem_constant_sel_constants:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    testb $1, %dil
-; CHECK-NEXT:    jne .LBB15_1
+; CHECK-NEXT:    jne .LBB19_1
 ; CHECK-NEXT:  # %bb.2:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB15_1:
+; CHECK-NEXT:  .LBB19_1:
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, double -4.0, double 23.3