Index: llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td +++ llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td @@ -286,7 +286,7 @@ def AMDGPUffbh_u32_impl : SDNode<"AMDGPUISD::FFBH_U32", SDTIntBitCountUnaryOp>; def AMDGPUffbh_i32_impl : SDNode<"AMDGPUISD::FFBH_I32", SDTIntBitCountUnaryOp>; -def AMDGPUffbl_b32 : SDNode<"AMDGPUISD::FFBL_B32", SDTIntUnaryOp>; +def AMDGPUffbl_b32_impl : SDNode<"AMDGPUISD::FFBL_B32", SDTIntBitCountUnaryOp>; // Signed and unsigned 24-bit multiply. The highest 8-bits are ignore // when performing the mulitply. The result is a 32-bit value. @@ -428,6 +428,10 @@ [(ctlz_zero_undef node:$src), (AMDGPUffbh_u32_impl node:$src)]>; +def AMDGPUffbl_b32 : PatFrags<(ops node:$src), + [(cttz_zero_undef node:$src), + (AMDGPUffbl_b32_impl node:$src)]>; + def AMDGPUpkrtz_f16_f32 : PatFrags<(ops node:$src0, node:$src1), [(int_amdgcn_cvt_pkrtz node:$src0, node:$src1), (AMDGPUpkrtz_f16_f32_impl node:$src0, node:$src1)]>; Index: llvm/lib/Target/AMDGPU/SOPInstructions.td =================================================================== --- llvm/lib/Target/AMDGPU/SOPInstructions.td +++ llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -210,7 +210,9 @@ def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">; def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">; -def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">; +def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64", + [(set i32:$sdst, (AMDGPUffbl_b32 i64:$src0))] +>; def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32", [(set i32:$sdst, (AMDGPUffbl_b32 i32:$src0))] Index: llvm/lib/Target/AMDGPU/VOP1Instructions.td =================================================================== --- llvm/lib/Target/AMDGPU/VOP1Instructions.td +++ llvm/lib/Target/AMDGPU/VOP1Instructions.td @@ -241,7 +241,7 @@ defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>; defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32, bitreverse>; defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32, AMDGPUffbh_u32>; -defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>; +defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32, AMDGPUffbl_b32>; defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32, AMDGPUffbh_i32>; let SchedRW = [WriteDoubleAdd] in { Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-cttz-zero-undef.mir @@ -0,0 +1,82 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: cttz_zero_undef_s32_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: cttz_zero_undef_s32_ss + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[S_FF1_I32_B32_:%[0-9]+]]:sreg_32 = S_FF1_I32_B32 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FF1_I32_B32_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: cttz_zero_undef_s32_vs +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: cttz_zero_undef_s32_vs + ; CHECK: liveins: $sgpr0 + ; CHECK: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0 + ; CHECK: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] + %0:sgpr(s32) = COPY $sgpr0 + %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: cttz_zero_undef_s32_vv +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: cttz_zero_undef_s32_vv + ; CHECK: liveins: $vgpr0 + ; CHECK: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK: [[V_FFBL_B32_e64_:%[0-9]+]]:vgpr_32 = V_FFBL_B32_e64 [[COPY]], implicit $exec + ; CHECK: S_ENDPGM 0, implicit [[V_FFBL_B32_e64_]] + %0:vgpr(s32) = COPY $vgpr0 + %1:vgpr(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: cttz_zero_undef_s64_ss +legalized: true +regBankSelected: true +tracksRegLiveness: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + + ; CHECK-LABEL: name: cttz_zero_undef_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1 + ; CHECK: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1 + ; CHECK: [[S_FF1_I32_B64_:%[0-9]+]]:sreg_32 = S_FF1_I32_B64 [[COPY]] + ; CHECK: S_ENDPGM 0, implicit [[S_FF1_I32_B64_]] + %0:sgpr(s64) = COPY $sgpr0_sgpr1 + %1:sgpr(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +...