Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -5065,6 +5065,10 @@ Denominator, Numerator); } case Intrinsic::amdgcn_icmp: { + if (Op.getOperand(1).getValueType() == MVT::i1 && + Op.getConstantOperandVal(2) == 0 && + Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE) + return Op; return lowerICMPIntrinsic(*this, Op.getNode(), DAG); } case Intrinsic::amdgcn_fcmp: { Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -597,6 +597,11 @@ >; // TODO: we could add more variants for other types of conditionals +def : Pat < + (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)), + (COPY $src) // Return the SGPRs representing i1 src +>; + //===----------------------------------------------------------------------===// // VOP1 Patterns //===----------------------------------------------------------------------===// Index: lib/Transforms/InstCombine/InstCombineCalls.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineCalls.cpp +++ lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -3652,6 +3652,11 @@ // Promote to next legal integer type. unsigned Width = CmpType->getBitWidth(); unsigned NewWidth = Width; + + // Don't do anything for i1 comparisons. + if (Width == 1) + break; + if (Width <= 16) NewWidth = 16; else if (Width <= 32) Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll +++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll @@ -4,6 +4,7 @@ declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0 +declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0 ; No crash on invalid input ; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc: @@ -314,4 +315,21 @@ ret void } +; GCN-LABEL: {{^}}v_icmp_i1_ne0: +; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]], +; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]], +; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]] +; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1 +; GCN-NEXT: v_mov_b32_e32 +; GCN-NEXT: v_mov_b32_e32 +; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2 +define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) { + %c0 = icmp ugt i32 %a, 1 + %c1 = icmp ugt i32 %b, 2 + %src = and i1 %c0, %c1 + %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33) + store i64 %result, i64 addrspace(1)* %out + ret void +} + attributes #0 = { nounwind readnone convergent }