Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -7345,23 +7345,49 @@ } } - if (CRHS && VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND && - isBoolSGPR(LHS.getOperand(0))) { - // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1 - // setcc (sext from i1 cc), -1, eq|sle|uge) => cc - // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1 - // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc - if ((CRHS->isAllOnesValue() && - (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) || - (CRHS->isNullValue() && - (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE))) - return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), - DAG.getConstant(-1, SL, MVT::i1)); - if ((CRHS->isAllOnesValue() && - (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) || - (CRHS->isNullValue() && - (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT))) - return LHS.getOperand(0); + if (CRHS) { + if (VT == MVT::i32 && LHS.getOpcode() == ISD::SIGN_EXTEND && + isBoolSGPR(LHS.getOperand(0))) { + // setcc (sext from i1 cc), -1, ne|sgt|ult) => not cc => xor cc, -1 + // setcc (sext from i1 cc), -1, eq|sle|uge) => cc + // setcc (sext from i1 cc), 0, eq|sge|ule) => not cc => xor cc, -1 + // setcc (sext from i1 cc), 0, ne|ugt|slt) => cc + if ((CRHS->isAllOnesValue() && + (CC == ISD::SETNE || CC == ISD::SETGT || CC == ISD::SETULT)) || + (CRHS->isNullValue() && + (CC == ISD::SETEQ || CC == ISD::SETGE || CC == ISD::SETULE))) + return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), + DAG.getConstant(-1, SL, MVT::i1)); + if ((CRHS->isAllOnesValue() && + (CC == ISD::SETEQ || CC == ISD::SETLE || CC == ISD::SETUGE)) || + (CRHS->isNullValue() && + (CC == ISD::SETNE || CC == ISD::SETUGT || CC == ISD::SETLT))) + return LHS.getOperand(0); + } + + uint64_t CRHSVal = CRHS->getZExtValue(); + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + LHS.getOpcode() == ISD::SELECT && + isa(LHS.getOperand(1)) && + isa(LHS.getOperand(2)) && + LHS.getConstantOperandVal(1) != LHS.getConstantOperandVal(2) && + isBoolSGPR(LHS.getOperand(0))) { + // Given CT != FT: + // setcc (select cc, CT, CF), CF, eq => xor cc, -1 + // setcc (select cc, CT, CF), CF, ne => cc + // setcc (select cc, CT, CF), CT, ne => xor cc, -1 + // setcc (select cc, CT, CF), CT, eq => cc + uint64_t CT = LHS.getConstantOperandVal(1); + uint64_t CF = LHS.getConstantOperandVal(2); + + if ((CF == CRHSVal && CC == ISD::SETEQ) || + (CT == CRHSVal && CC == ISD::SETNE)) + return DAG.getNode(ISD::XOR, SL, MVT::i1, LHS.getOperand(0), + DAG.getConstant(-1, SL, MVT::i1)); + if ((CF == CRHSVal && CC == ISD::SETNE) || + (CT == CRHSVal && CC == ISD::SETEQ)) + return LHS.getOperand(0); + } } if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() && Index: test/CodeGen/AMDGPU/dagcombine-setcc-select.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/dagcombine-setcc-select.ll @@ -0,0 +1,104 @@ +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -O0 < %s | FileCheck -enable-var-scope -check-prefix=GCN %s + +; GCN-LABEL: {{^}}eq_t: +; GCN-DAG: s_load_dword [[X:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}} +; GCN-NOT: 0xddd5 +; GCN-NOT: v_cndmask_b32 +; GCN-NOT: v_cmp_eq_u32 +; GCN-NOT: v_cndmask_b32 +; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0 +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[TWO]], [[FOUR]], [[CC]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +define amdgpu_kernel void @eq_t(float %x) { + %c1 = fcmp olt float %x, 1.0 + %s1 = select i1 %c1, i32 56789, i32 1 + %c2 = icmp eq i32 %s1, 56789 + %s2 = select i1 %c2, float 4.0, float 2.0 + store float %s2, float* undef, align 4 + ret void +} + +; GCN-LABEL: {{^}}ne_t: +; GCN-DAG: s_load_dword [[X:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}} +; GCN-NOT: 0xddd5 +; GCN-NOT: v_cndmask_b32 +; GCN-NOT: v_cmp_eq_u32 +; GCN-NOT: v_cndmask_b32 +; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0 +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[FOUR]], [[TWO]], [[CC]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +define amdgpu_kernel void @ne_t(float %x) { + %c1 = fcmp olt float %x, 1.0 + %s1 = select i1 %c1, i32 56789, i32 1 + %c2 = icmp ne i32 %s1, 56789 + %s2 = select i1 %c2, float 4.0, float 2.0 + store float %s2, float* undef, align 4 + ret void +} + +; GCN-LABEL: {{^}}eq_f: +; GCN-DAG: s_load_dword [[X:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}} +; GCN-NOT: 0xddd5 +; GCN-NOT: v_cndmask_b32 +; GCN-NOT: v_cmp_eq_u32 +; GCN-NOT: v_cndmask_b32 +; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0 +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[FOUR]], [[TWO]], [[CC]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +define amdgpu_kernel void @eq_f(float %x) { + %c1 = fcmp olt float %x, 1.0 + %s1 = select i1 %c1, i32 1, i32 56789 + %c2 = icmp eq i32 %s1, 56789 + %s2 = select i1 %c2, float 4.0, float 2.0 + store float %s2, float* undef, align 4 + ret void +} + +; GCN-LABEL: {{^}}ne_f: +; GCN-DAG: s_load_dword [[X:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 +; GCN: v_cmp_lt_f32_e{{32|64}} [[CC:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}} +; GCN-NOT: 0xddd5 +; GCN-NOT: v_cndmask_b32 +; GCN-NOT: v_cmp_eq_u32 +; GCN-NOT: v_cndmask_b32 +; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0 +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[TWO]], [[FOUR]], [[CC]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +define amdgpu_kernel void @ne_f(float %x) { + %c1 = fcmp olt float %x, 1.0 + %s1 = select i1 %c1, i32 1, i32 56789 + %c2 = icmp ne i32 %s1, 56789 + %s2 = select i1 %c2, float 4.0, float 2.0 + store float %s2, float* undef, align 4 + ret void +} + +; GCN-LABEL: {{^}}different_constants: +; GCN-DAG: s_load_dword [[X:s[0-9]+]] +; GCN-DAG: v_mov_b32_e32 [[ONE:v[0-9]+]], 1.0 +; GCN-DAG: v_cmp_lt_f32_e{{32|64}} [[CC1:s\[[0-9]+:[0-9]+\]|vcc]], [[X]], [[ONE]]{{$}} +; GCN-DAG: v_cndmask_b32_e{{32|64}} [[CND1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, [[CC1]] +; GCN-DAG: v_cmp_eq_u32_e{{32|64}} [[CC2:s\[[0-9]+:[0-9]+\]|vcc]], v{{[0-9]+}}, v{{[0-9]+}}{{$}} +; GCN-DAG: v_mov_b32_e32 [[TWO:v[0-9]+]], 2.0 +; GCN-DAG: v_mov_b32_e32 [[FOUR:v[0-9]+]], 4.0 +; GCN: v_cndmask_b32_e{{32|64}} [[RES:v[0-9]+]], [[TWO]], [[FOUR]], [[CC2]] +; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}} +define amdgpu_kernel void @different_constants(float %x) { + %c1 = fcmp olt float %x, 1.0 + %s1 = select i1 %c1, i32 56789, i32 1 + %c2 = icmp eq i32 %s1, 5678 + %s2 = select i1 %c2, float 4.0, float 2.0 + store float %s2, float* undef, align 4 + ret void +}