Index: llvm/lib/Target/AMDGPU/AMDGPUGISel.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUGISel.td +++ llvm/lib/Target/AMDGPU/AMDGPUGISel.td @@ -140,7 +140,7 @@ def : GINodeEquiv; def : GINodeEquiv; -def : GINodeEquiv; +def : GINodeEquiv; def : GINodeEquiv; def : GINodeEquiv; Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -283,8 +283,8 @@ def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; -def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>; -def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>; +def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>; +def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>; def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; @@ -424,6 +424,10 @@ [(int_amdgcn_sffbh node:$src), (AMDGPUffbh_i32_impl node:$src)]>; +def AMDGPUffbh_u32 : PatFrags<(ops node:$src), + [(ctlz_zero_undef node:$src), + (AMDGPUffbh_u32_impl node:$src)]>; + def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -220,7 +220,9 @@ [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))] >; -def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">; +def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64", + [(set i32:$sdst, (AMDGPUffbh_u32 i64:$src0))] +>; def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32", [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))] >; Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ctlz-zero-undef.mir @@ -0,0 +1,82 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: ctlz_zero_undef_s32_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s32_ss + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_FLBIT_I32_B32_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B32 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s32_vs + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: ctlz_zero_undef_s32_vv + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_FFBH_U32_e64_:%[0-9]+]]:vgpr_32 = V_FFBH_U32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBH_U32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s64_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: ctlz_zero_undef_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_FLBIT_I32_B64_:%[0-9]+]]:sreg_32 = S_FLBIT_I32_B64 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FLBIT_I32_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +...