Index: include/llvm/IR/IntrinsicsAMDGPU.td =================================================================== --- include/llvm/IR/IntrinsicsAMDGPU.td +++ include/llvm/IR/IntrinsicsAMDGPU.td @@ -174,6 +174,11 @@ [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem] >; +// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz +// should be used. +def int_amdgcn_sffbh : + Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + // TODO: Do we want an ordering for these? def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty], [llvm_anyptr_ty, LLVMMatchType<0>], Index: lib/Target/AMDGPU/AMDGPUISelLowering.h =================================================================== --- lib/Target/AMDGPU/AMDGPUISelLowering.h +++ lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -266,6 +266,7 @@ BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. FFBH_U32, // ctlz with -1 if input is zero. + FFBH_I32, MUL_U24, MUL_I24, MAD_U24, Index: lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -202,6 +202,7 @@ def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; +def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; // Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when // performing the mulitply. The result is a 32-bit value. Index: lib/Target/AMDGPU/AMDGPUIntrinsics.td =================================================================== --- lib/Target/AMDGPU/AMDGPUIntrinsics.td +++ lib/Target/AMDGPU/AMDGPUIntrinsics.td @@ -16,6 +16,8 @@ def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; + + // Deprecated in favor of llvm.amdgcn.sffbh def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; // Deprecated in favor of separate int_amdgcn_cube* intrinsics. Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -1910,6 +1910,9 @@ return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0, Denominator, Numerator); } + case Intrinsic::amdgcn_sffbh: + case AMDGPUIntrinsic::AMDGPU_flbit_i32: // Legacy name. + return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1)); default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); } Index: lib/Target/AMDGPU/SIInstructions.td =================================================================== --- lib/Target/AMDGPU/SIInstructions.td +++ lib/Target/AMDGPU/SIInstructions.td @@ -139,7 +139,7 @@ defm S_FLBIT_I32_B64 : SOP1_32_64 , "s_flbit_i32_b64", []>; defm S_FLBIT_I32 : SOP1_32 , "s_flbit_i32", - [(set i32:$sdst, (int_AMDGPU_flbit_i32 i32:$src0))] + [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))] >; defm S_FLBIT_I32_I64 : SOP1_32_64 , "s_flbit_i32_i64", []>; defm S_SEXT_I32_I8 : SOP1_32 , "s_sext_i32_i8", Index: test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll =================================================================== --- test/CodeGen/AMDGPU/llvm.AMDGPU.flbit.i32.ll +++ /dev/null @@ -1,28 +0,0 @@ -; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s - -declare i32 @llvm.AMDGPU.flbit.i32(i32) nounwind readnone - -; FUNC-LABEL: {{^}}s_flbit: -; SI: s_load_dword [[VAL:s[0-9]+]], -; SI: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] -; SI: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] -; SI: buffer_store_dword [[VRESULT]], -; SI: s_endpgm -define void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) nounwind { - %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone - store i32 %r, i32 addrspace(1)* %out, align 4 - ret void -} - -; FUNC-LABEL: {{^}}v_flbit: -; SI: buffer_load_dword [[VAL:v[0-9]+]], -; SI: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] -; SI: buffer_store_dword [[RESULT]], -; SI: s_endpgm -define void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { - %val = load i32, i32 addrspace(1)* %valptr, align 4 - %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone - store i32 %r, i32 addrspace(1)* %out, align 4 - ret void -} Index: test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/llvm.amdgcn.sffbh.ll @@ -0,0 +1,54 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +declare i32 @llvm.amdgcn.sffbh.i32(i32) #1 +declare i32 @llvm.AMDGPU.flbit.i32(i32) #1 + +; FUNC-LABEL: {{^}}s_flbit: +; GCN: s_load_dword [[VAL:s[0-9]+]], +; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +define void @s_flbit(i32 addrspace(1)* noalias %out, i32 %val) #0 { + %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val) + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}v_flbit: +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[RESULT]], +define void @v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) #0 { + %val = load i32, i32 addrspace(1)* %valptr, align 4 + %r = call i32 @llvm.amdgcn.sffbh.i32(i32 %val) + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}legacy_s_flbit: +; GCN: s_load_dword [[VAL:s[0-9]+]], +; GCN: s_flbit_i32 [[SRESULT:s[0-9]+]], [[VAL]] +; GCN: v_mov_b32_e32 [[VRESULT:v[0-9]+]], [[SRESULT]] +; GCN: buffer_store_dword [[VRESULT]], +; GCN: s_endpgm +define void @legacy_s_flbit(i32 addrspace(1)* noalias %out, i32 %val) nounwind { + %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +; FUNC-LABEL: {{^}}legacy_v_flbit: +; GCN: buffer_load_dword [[VAL:v[0-9]+]], +; GCN: v_ffbh_i32_e32 [[RESULT:v[0-9]+]], [[VAL]] +; GCN: buffer_store_dword [[RESULT]], +; GCN: s_endpgm +define void @legacy_v_flbit(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind { + %val = load i32, i32 addrspace(1)* %valptr, align 4 + %r = call i32 @llvm.AMDGPU.flbit.i32(i32 %val) nounwind readnone + store i32 %r, i32 addrspace(1)* %out, align 4 + ret void +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone }