Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1901,8 +1901,19 @@ return SDValue(); // Bail out if any constants are opaque because we can't constant fold those. + // The exception is "and" and "or" with either 0 or -1 in which case we can + // propagate non constant operands into select. + bool CanFoldNonConst = false; + if (BinOpcode == ISD::AND || BinOpcode == ISD::OR) { + ConstantSDNode *CTN = cast(CT); + ConstantSDNode *CFN = cast(CF); + CanFoldNonConst = (CTN->isNullValue() || CTN->isAllOnesValue()) && + (CFN->isNullValue() || CFN->isAllOnesValue()); + } + SDValue CBO = BO->getOperand(SelOpNo ^ 1); - if (!isConstantOrConstantVector(CBO, true) && + if (!CanFoldNonConst && + !isConstantOrConstantVector(CBO, true) && !isConstantFPBuildVectorOrConstantFP(CBO)) return SDValue(); @@ -1923,14 +1934,14 @@ SDLoc DL(Sel); SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT) : DAG.getNode(BinOpcode, DL, VT, CT, CBO); - if (!NewCT.isUndef() && + if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF) : DAG.getNode(BinOpcode, DL, VT, CF, CBO); - if (!NewCF.isUndef() && + if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); Index: test/CodeGen/AMDGPU/dagcombine-select.ll =================================================================== --- test/CodeGen/AMDGPU/dagcombine-select.ll +++ test/CodeGen/AMDGPU/dagcombine-select.ll @@ -1,5 +1,53 @@ ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s +; GCN-LABEL: {{^}}select_and1: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_and2: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %s, %y + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or1: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or2: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %s, %y + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + ; GCN-LABEL: {{^}}sel_constants_sub_constant_sel_constants: ; v_cndmask_b32_e64 v{{[0-9]+}}, 2, 9, define amdgpu_kernel void @sel_constants_sub_constant_sel_constants(i32 addrspace(1)* %p, i1 %cond) { Index: test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem.ll +++ test/CodeGen/AMDGPU/udivrem.ll @@ -31,25 +31,25 @@ ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] -; SI: v_cndmask_b32_e64 -; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] +; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]] +; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]] +; SI: v_mul_hi_u32 [[E:v[0-9]+]], [[CND1]], [[RCP]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] -; SI: v_cndmask_b32_e64 -; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] -; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] +; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]] +; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]], +; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]], ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]], -; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) { %result0 = udiv i32 %x, %y @@ -124,8 +124,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -147,8 +145,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -157,6 +153,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { %result0 = udiv <2 x i32> %x, %y @@ -274,8 +271,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -297,8 +292,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -320,8 +313,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -339,6 +330,7 @@ ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { %result0 = udiv <4 x i32> %x, %y Index: test/CodeGen/X86/dagcombine-select.ll =================================================================== --- test/CodeGen/X86/dagcombine-select.ll +++ test/CodeGen/X86/dagcombine-select.ll @@ -6,9 +6,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: cmovgel %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -21,9 +19,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: andl %esi, %eax +; CHECK-NEXT: cmovgel %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -34,11 +30,9 @@ define i32 @select_or1(i32 %x, i32 %y) { ; CHECK-LABEL: select_or1: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovll %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1 @@ -49,11 +43,9 @@ define i32 @select_or2(i32 %x, i32 %y) { ; CHECK-LABEL: select_or2: ; CHECK: # %bb.0: -; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: cmpl $11, %edi -; CHECK-NEXT: setl %al -; CHECK-NEXT: decl %eax -; CHECK-NEXT: orl %esi, %eax +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovll %esi, %eax ; CHECK-NEXT: retq %c = icmp slt i32 %x, 11 %s = select i1 %c, i32 0, i32 -1