Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1877,16 +1877,16 @@ BinOpcode == ISD::FDIV || BinOpcode == ISD::FREM) && "Unexpected binary operator"); - // Bail out if any constants are opaque because we can't constant fold those. - SDValue C1 = BO->getOperand(1); - if (!isConstantOrConstantVector(C1, true) && - !isConstantFPBuildVectorOrConstantFP(C1)) - return SDValue(); - // Don't do this unless the old select is going away. We want to eliminate the // binary operator, not replace a binop with a select. // TODO: Handle ISD::SELECT_CC. + unsigned SelOpNo = 0; SDValue Sel = BO->getOperand(0); + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) { + SelOpNo = 1; + Sel = BO->getOperand(1); + } + if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) return SDValue(); @@ -1900,19 +1900,36 @@ !isConstantFPBuildVectorOrConstantFP(CF)) return SDValue(); + // Bail out if any constants are opaque because we can't constant fold those. + // The exception is "and" and "or" with either 0 or -1 in which case we can + // propagate non constant operands into select. + bool CanFoldNonConst = false; + if (BinOpcode == ISD::AND || BinOpcode == ISD::OR) { + ConstantSDNode *CTN = cast(CT); + ConstantSDNode *CFN = cast(CF); + CanFoldNonConst = (CTN->isNullValue() || CTN->isAllOnesValue()) && + (CFN->isNullValue() || CFN->isAllOnesValue()); + } + + SDValue C1 = BO->getOperand(SelOpNo ^ 1); + if (!CanFoldNonConst && + !isConstantOrConstantVector(C1, true) && + !isConstantFPBuildVectorOrConstantFP(C1)) + return SDValue(); + // We have a select-of-constants followed by a binary operator with a // constant. Eliminate the binop by pulling the constant math into the select. // Example: add (select Cond, CT, CF), C1 --> select Cond, CT + C1, CF + C1 EVT VT = Sel.getValueType(); SDLoc DL(Sel); SDValue NewCT = DAG.getNode(BinOpcode, DL, VT, CT, C1); - if (!NewCT.isUndef() && + if (!CanFoldNonConst && !NewCT.isUndef() && !isConstantOrConstantVector(NewCT, true) && !isConstantFPBuildVectorOrConstantFP(NewCT)) return SDValue(); SDValue NewCF = DAG.getNode(BinOpcode, DL, VT, CF, C1); - if (!NewCF.isUndef() && + if (!CanFoldNonConst && !NewCF.isUndef() && !isConstantOrConstantVector(NewCF, true) && !isConstantFPBuildVectorOrConstantFP(NewCF)) return SDValue(); Index: test/CodeGen/AMDGPU/dagcombine-select.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dagcombine-select.ll @@ -0,0 +1,49 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s + +; GCN-LABEL: {{^}}select_and1: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_and1(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_and2: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], 0, v{{[0-9]+}}, +; GCN-NOT: v_and_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_and2(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %s, %y + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or1: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_or1(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %y, %s + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} + +; GCN-LABEL: {{^}}select_or2: +; GCN: v_cndmask_b32_e32 [[SEL:v[0-9]+]], -1, v{{[0-9]+}}, +; GCN-NOT: v_or_b32 +; GCN: store_dword [[SEL]], +define amdgpu_kernel void @select_or2(i32 addrspace(1)* %p, i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %s, %y + store i32 %a, i32 addrspace(1)* %p, align 4 + ret void +} Index: test/CodeGen/AMDGPU/udivrem.ll =================================================================== --- test/CodeGen/AMDGPU/udivrem.ll +++ test/CodeGen/AMDGPU/udivrem.ll @@ -31,25 +31,25 @@ ; SI-DAG: v_mul_hi_u32 [[RCP_HI:v[0-9]+]], [[RCP]] ; SI-DAG: v_mul_lo_i32 [[RCP_LO:v[0-9]+]], [[RCP]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[NEG_RCP_LO:v[0-9]+]], vcc, 0, [[RCP_LO]] -; SI: v_cndmask_b32_e64 -; SI: v_mul_hi_u32 [[E:v[0-9]+]], {{v[0-9]+}}, [[RCP]] +; SI: v_cmp_eq_u32_e64 [[CC1:s\[[0-9:]+\]]], 0, [[RCP_HI]] +; SI: v_cndmask_b32_e64 [[CND1:v[0-9]+]], [[RCP_LO]], [[NEG_RCP_LO]], [[CC1]] +; SI: v_mul_hi_u32 [[E:v[0-9]+]], [[CND1]], [[RCP]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[RCP_A_E:v[0-9]+]], vcc, [[E]], [[RCP]] ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[RCP_S_E:v[0-9]+]], vcc, [[E]], [[RCP]] -; SI: v_cndmask_b32_e64 -; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]] -; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]] +; SI: v_cndmask_b32_e64 [[CND2:v[0-9]+]], [[RCP_S_E]], [[RCP_A_E]], [[CC1]] +; SI: v_mul_hi_u32 [[Quotient:v[0-9]+]], [[CND2]], +; SI: v_mul_lo_i32 [[Num_S_Remainder:v[0-9]+]], [[CND2]] ; SI-DAG: v_add_{{[iu]}}32_e32 [[Quotient_A_One:v[0-9]+]], vcc, 1, [[Quotient]] ; SI-DAG: v_sub_{{[iu]}}32_e32 [[Remainder:v[0-9]+]], vcc, {{[vs][0-9]+}}, [[Num_S_Remainder]] ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Quotient_S_One:v[0-9]+]], ; SI-DAG: v_subrev_{{[iu]}}32_e32 [[Remainder_S_Den:v[0-9]+]], -; SI: v_and_b32_e32 [[Tmp1:v[0-9]+]] ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_add_{{[iu]}}32_e32 [[Remainder_A_Den:v[0-9]+]], ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) { %result0 = udiv i32 %x, %y @@ -124,8 +124,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -147,8 +145,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -157,6 +153,7 @@ ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v2(<2 x i32> addrspace(1)* %out, <2 x i32> %x, <2 x i32> %y) { %result0 = udiv <2 x i32> %x, %y @@ -274,8 +271,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -297,8 +292,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -320,8 +313,6 @@ ; SI-DAG: v_mul_lo_i32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_cndmask_b32_e64 -; SI-DAG: v_and_b32_e32 ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 @@ -339,6 +330,7 @@ ; SI-DAG: v_add_{{[iu]}}32_e32 ; SI-DAG: v_subrev_{{[iu]}}32_e32 ; SI-DAG: v_cndmask_b32_e64 +; SI-NOT: v_and_b32 ; SI: s_endpgm define amdgpu_kernel void @test_udivrem_v4(<4 x i32> addrspace(1)* %out, <4 x i32> %x, <4 x i32> %y) { %result0 = udiv <4 x i32> %x, %y Index: test/CodeGen/X86/dagcombine-select.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/dagcombine-select.ll @@ -0,0 +1,47 @@ +; RUN: llc -march=x86-64 -verify-machineinstrs < %s | FileCheck -enable-var-scope %s + +; CHECK-LABEL: {{^}}select_and1: +; CHECK: cmpl $11, %edi +; CHECK-NEXT: cmovgel %esi, %eax +; CHECK-NEXT: retq +define i32 @select_and1(i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %y, %s + ret i32 %a +} + +; CHECK-LABEL: {{^}}select_and2: +; CHECK: cmpl $11, %edi +; CHECK-NEXT: cmovgel %esi, %eax +; CHECK-NEXT: retq +define i32 @select_and2(i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = and i32 %s, %y + ret i32 %a +} + +; CHECK-LABEL: {{^}}select_or1: +; CHECK: cmpl $11, %edi +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovll %esi, %eax +; CHECK-NEXT: retq +define i32 @select_or1(i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %y, %s + ret i32 %a +} + +; CHECK-LABEL: {{^}}select_or2: +; CHECK: cmpl $11, %edi +; CHECK-NEXT: movl $-1, %eax +; CHECK-NEXT: cmovll %esi, %eax +; CHECK-NEXT: retq +define i32 @select_or2(i32 %x, i32 %y) { + %c = icmp slt i32 %x, 11 + %s = select i1 %c, i32 0, i32 -1 + %a = or i32 %s, %y + ret i32 %a +}