diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -2212,18 +2212,18 @@ >; def : GCNPat < - (i1 (trunc i32:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i32:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i16:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1)) + (i1 (DivergentUnaryFrag i16:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1)) >; def : GCNPat < - (i1 (trunc i64:$a)), - (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), + (i1 (DivergentUnaryFrag i64:$a)), + (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1)) >; diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -794,6 +794,18 @@ list ret = [!con(Outs, (set Ins))]; } +class DivergentUnaryFrag : PatFrag < + (ops node:$src0), + (Op $src0), + [{ return N->isDivergent(); }]> { + // This check is unnecessary as it's captured by the result register + // bank constraint. + // + // FIXME: Should add a way for the emitter to recognize this is a + // trivially true predicate to eliminate the check. + let GISelPredicateCode = [{return true;}]; +} + class VOPPatOrNull { list ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen.ret, []); } diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll @@ -0,0 +1,59 @@ +; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s + +; GCN-LABEL: name: uniform_trunc_i16_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i16_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) { + %setcc = icmp slt i16 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i32_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i32_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) { + %setcc = icmp slt i32 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} + +; GCN-LABEL: name: uniform_trunc_i64_to_i1 +; GCN: S_AND_B32 1 +; GCN: S_CMP_EQ_U32 +define amdgpu_kernel void @uniform_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + store i1 %select, i1 addrspace(1)* %out + ret void +} + +; GCN-LABEL: name: divergent_trunc_i64_to_i1 +; GCN: V_AND_B32_e64 1 +; GCN: V_CMP_EQ_U32_e64 +define i1 @divergent_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) { + %setcc = icmp slt i64 %x, 0 + %select = select i1 %setcc, i1 true, i1 %z + ret i1 %select +} +