diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -846,6 +846,8 @@ AMDGPUAS::CONSTANT_ADDRESS_32BIT) return true; return false; + case AMDGPUISD::SETCC: // ballot-style instruction + return true; } return false; } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll @@ -71,21 +71,14 @@ ret i32 %ballot } -define amdgpu_cs float @ctpop_of_ballot(i32 %x, i32 %y) { +define amdgpu_cs i32 @ctpop_of_ballot(float %x, float %y) { ; CHECK-LABEL: ctpop_of_ballot: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc_lo, v0, v1 - -; TODO: This should use a scalar s_bcnt1 instruction. -; NOTE: The final mul is cruft to prevent a "bad VGPR to SGPR copy" error - -; CHECK-NEXT: v_bcnt_u32_b32 v1, vcc_lo, 0 -; CHECK-NEXT: v_mul_lo_u32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1 +; CHECK-NEXT: s_bcnt1_i32_b32 s0, vcc_lo ; CHECK-NEXT: ; return to shader part epilog - %cmp = icmp ugt i32 %x, %y + %cmp = fcmp ogt float %x, %y %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) %bcnt = call i32 @llvm.ctpop.i32(i32 %ballot) - %r.i = mul i32 %x, %bcnt - %r = bitcast i32 %r.i to float - ret float %r + ret i32 %bcnt } diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -74,23 +74,15 @@ ret i64 %ballot } -define amdgpu_cs float @ctpop_of_ballot(i32 %x, i32 %y) { +define amdgpu_cs i64 @ctpop_of_ballot(float %x, float %y) { ; CHECK-LABEL: ctpop_of_ballot: ; CHECK: ; %bb.0: -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1 - -; TODO: This should use a scalar s_bcnt1 instruction. -; NOTE: The final mul is cruft to prevent a "bad VGPR to SGPR copy" error - -; CHECK-NEXT: v_bcnt_u32_b32 v1, vcc_lo, 0 -; CHECK-NEXT: v_bcnt_u32_b32 v1, vcc_hi, v1 -; CHECK-NEXT: v_mul_lo_u32 v0, v0, v1 +; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, v0, v1 +; CHECK-NEXT: s_bcnt1_i32_b64 s0, vcc +; CHECK-NEXT: s_mov_b32 s1, 0 ; CHECK-NEXT: ; return to shader part epilog - %cmp = icmp ugt i32 %x, %y + %cmp = fcmp ogt float %x, %y %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) %bcnt = call i64 @llvm.ctpop.i64(i64 %ballot) - %bcnt.32 = trunc i64 %bcnt to i32 - %r.i = mul i32 %x, %bcnt.32 - %r = bitcast i32 %r.i to float - ret float %r + ret i64 %bcnt }