diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -992,11 +992,18 @@ (i64 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_64)) >; - let WaveSizePredicate = isWave32 in - def : GCNPat < - (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)), - (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32)) - >; + let WaveSizePredicate = isWave32 in { + def : GCNPat < + (i32 (AMDGPUsetcc vt:$src0, vt:$src1, cond)), + (i32 (COPY_TO_REGCLASS (inst $src0, $src1), SReg_32)) + >; + + // Support codegen of i64 setcc in wave32 mode. + def : GCNPat < + (i64 (AMDGPUsetcc vt:$src0, vt:$src1, cond)), + (i64 (REG_SEQUENCE SReg_64, (inst $src0, $src1), sub0, (i32 (IMPLICIT_DEF)), sub1)) + >; + } } defm : ICMP_Pattern ; @@ -1056,13 +1063,22 @@ DSTCLAMP.NONE), SReg_64)) >; - let WaveSizePredicate = isWave32 in - def : GCNPat < - (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), - (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), - (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1, - DSTCLAMP.NONE), SReg_32)) - >; + let WaveSizePredicate = isWave32 in { + def : GCNPat < + (i32 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), + (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), + (i32 (COPY_TO_REGCLASS (inst $src0_modifiers, $src0, $src1_modifiers, $src1, + DSTCLAMP.NONE), SReg_32)) + >; + + def : GCNPat < + (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)), + (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)), + (i64 (REG_SEQUENCE SReg_64, (inst $src0_modifiers, $src0, $src1_modifiers, $src1, + DSTCLAMP.NONE), sub0, + (i32 (IMPLICIT_DEF)), sub1)) + >; + } } defm : FCMP_Pattern ; diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.wave32.ll @@ -0,0 +1,88 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s +; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck %s + +declare i64 @llvm.amdgcn.ballot.i64(i1) +declare i64 @llvm.ctpop.i64(i64) + +; Test ballot(0) + +define amdgpu_cs i64 @constant_false() { +; CHECK-LABEL: constant_false: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s0, 0 +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ; return to shader part epilog + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0) + ret i64 %ballot +} + +; Test ballot(1) + +define amdgpu_cs i64 @constant_true() { +; CHECK-LABEL: constant_true: +; CHECK: ; %bb.0: +; CHECK-NEXT: s_mov_b32 s0, exec_lo +; CHECK-NEXT: s_mov_b32 s1, exec_hi +; CHECK-NEXT: ; return to shader part epilog + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1) + ret i64 %ballot +} + +; Test ballot of a non-comparison operation + +define amdgpu_cs i64 @non_compare(i32 %x) { +; CHECK-LABEL: non_compare: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_ne_u32_e64 s0, 0, v0 +; CHECK-NEXT: ; return to shader part epilog + %trunc = trunc i32 %x to i1 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc) + ret i64 %ballot +} + +; Test ballot of comparisons + +define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) { +; CHECK-LABEL: compare_ints: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_eq_u32_e64 s0, v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = icmp eq i32 %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} + +define amdgpu_cs i64 @compare_int_with_constant(i32 %x) { +; CHECK-LABEL: compare_int_with_constant: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_lt_i32_e64 s0, 0x62, v0 +; CHECK-NEXT: ; return to shader part epilog + %cmp = icmp sge i32 %x, 99 + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} + +define amdgpu_cs i64 @compare_floats(float %x, float %y) { +; CHECK-LABEL: compare_floats: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt float %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} + +define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) { +; CHECK-LABEL: ctpop_of_ballot: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_cmp_gt_f32_e64 s0, v0, v1 +; CHECK-NEXT: s_bcnt1_i32_b64 s0, s[0:1] +; CHECK-NEXT: s_mov_b32 s1, 0 +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt float %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot) + ret i64 %bcnt +}