diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3523,24 +3523,24 @@ ISD::CondCode CCOpcode = cast(Cond.getOperand(2))->get(); SDValue CmpLHS = Cond.getOperand(0); - unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : - AMDGPUISD::FFBH_U32; - // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x if (CCOpcode == ISD::SETEQ && (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && - RHS.getOperand(0) == CmpLHS && - isNegativeOne(LHS)) { + RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) { + unsigned Opc = + isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; return getFFBX_U32(DAG, CmpLHS, SL, Opc); } // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x if (CCOpcode == ISD::SETNE && - (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && - LHS.getOperand(0) == CmpLHS && - isNegativeOne(RHS)) { + (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) && + LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) { + unsigned Opc = + isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; + return getFFBX_U32(DAG, CmpLHS, SL, Opc); } diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll --- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll +++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll @@ -198,8 +198,8 @@ } ; FUNC-LABEL: {{^}}v_cttz_i32_sel_eq_neg1: -; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]] -; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] +; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}} +; SI: buffer_store_dword [[VAL]], ; SI: s_endpgm ; EG: MEM_RAT_CACHELESS STORE_RAW ; EG: FFBL_INT @@ -213,8 +213,8 @@ } ; FUNC-LABEL: {{^}}v_cttz_i32_sel_ne_neg1: -; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]] -; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]] +; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}} +; SI: buffer_store_dword [[VAL]], ; SI: s_endpgm ; EG: MEM_RAT_CACHELESS STORE_RAW ; EG: FFBL_INT diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone +declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable +define amdgpu_kernel void @select_constant_cttz(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind { +; GCN-LABEL: select_constant_cttz: +; GCN: ; %bb.0: +; GCN-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xb +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_load_dword s8, s[2:3], 0x0 +; GCN-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 +; GCN-NEXT: s_mov_b32 s7, 0xf000 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_lshr_b32 s0, 1, s8 +; GCN-NEXT: s_ff1_i32_b32 s0, s0 +; GCN-NEXT: s_mov_b32 s6, -1 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], s8, 0 +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[2:3] +; GCN-NEXT: v_ffbh_i32_e32 v1, v0 +; GCN-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v0 +; GCN-NEXT: v_sub_i32_e32 v0, vcc, 31, v1 +; GCN-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] +; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -1, s[0:1] +; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GCN-NEXT: s_endpgm + %v = load i32, i32 addrspace(1)* %arrayidx, align 4 + %sr = lshr i32 1, %v + %cmp = icmp ne i32 %v, 0 + %cttz = call i32 @llvm.cttz.i32(i32 %sr, i1 true), !range !0 + %sel = select i1 %cmp, i32 -1, i32 %cttz + %ffbh = call i32 @llvm.amdgcn.sffbh.i32(i32 %sel) + %sub = sub i32 31, %ffbh + %cmp2 = icmp eq i32 %sel, 0 + %or = or i1 %cmp, %cmp2 + %sel2 = select i1 %or, i32 -1, i32 %sub + store i32 %sel2, i32 addrspace(1)* %out + ret void +} + +!0 = !{i32 0, i32 33}