Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -1028,8 +1028,10 @@ if (TypeIdx == 1) switch (MI.getOpcode()) { case TargetOpcode::G_CTLZ: + case TargetOpcode::G_CTLZ_ZERO_UNDEF: return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); case TargetOpcode::G_CTTZ: + case TargetOpcode::G_CTTZ_ZERO_UNDEF: return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); case TargetOpcode::G_CTPOP: return narrowScalarCTPOP(MI, TypeIdx, NarrowTy); @@ -3985,13 +3987,17 @@ unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF; + MachineIRBuilder &B = MIRBuilder; auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) auto C_0 = B.buildConstant(NarrowTy, 0); auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), UnmergeSrc.getReg(1), C_0); - auto LoCTLZ = B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); + auto LoCTLZ = IsUndef ? + B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) : + B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); @@ -4017,13 +4023,17 @@ unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF; + MachineIRBuilder &B = MIRBuilder; auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) auto C_0 = B.buildConstant(NarrowTy, 0); auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), UnmergeSrc.getReg(0), C_0); - auto HiCTTZ = B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); + auto HiCTTZ = IsUndef ? + B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) : + B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2083,7 +2083,9 @@ MI.eraseFromParent(); return; } - case AMDGPU::G_CTPOP: { + case AMDGPU::G_CTPOP: + case AMDGPU::G_CTLZ_ZERO_UNDEF: + case AMDGPU::G_CTTZ_ZERO_UNDEF: { MachineIRBuilder B(MI); MachineFunction &MF = B.getMF(); @@ -2103,7 +2105,7 @@ LegalizerHelper Helper(MF, Observer, B); if (Helper.narrowScalar(MI, 1, S32) != LegalizerHelper::Legalized) - llvm_unreachable("widenScalar should have succeeded"); + llvm_unreachable("narrowScalar should have succeeded"); return; } case AMDGPU::G_SEXT: @@ -3203,9 +3205,7 @@ OpdsMapping[0] = OpdsMapping[1] = AMDGPU::getValueMapping(BankID, Size); break; } - case AMDGPU::G_CTLZ: case AMDGPU::G_CTLZ_ZERO_UNDEF: - case AMDGPU::G_CTTZ: case AMDGPU::G_CTTZ_ZERO_UNDEF: case AMDGPU::G_CTPOP: { unsigned Size = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-ctlz-zero-undef.mir @@ -3,29 +3,72 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: ctlz_zero_undef_i32_s +name: ctlz_zero_undef_s32_s legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: name: ctlz_zero_undef_i32_s + ; CHECK-LABEL: name: ctlz_zero_undef_s32_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 ... --- -name: ctlz_zero_undef_i32_v +name: ctlz_zero_undef_s32_v legalized: true body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: ctlz_zero_undef_i32_v + ; CHECK-LABEL: name: ctlz_zero_undef_s32_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s64_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: ctlz_zero_undef_s64_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: S_ENDPGM 0, implicit [[CTLZ_ZERO_UNDEF]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: ctlz_zero_undef_s64_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: ctlz_zero_undef_s64_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV1]](s32), [[C]] + ; CHECK: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s32) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32 + ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTLZ_ZERO_UNDEF]], [[C1]] + ; CHECK: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[CTLZ_ZERO_UNDEF1]] + ; CHECK: S_ENDPGM 0, implicit [[SELECT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTLZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 ... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-cttz-zero-undef.mir @@ -3,29 +3,72 @@ # RUN: llc -march=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s --- -name: cttz_zero_undef_i32_s +name: cttz_zero_undef_s32_s legalized: true body: | bb.0: liveins: $sgpr0 - ; CHECK-LABEL: name: cttz_zero_undef_i32_s + ; CHECK-LABEL: name: cttz_zero_undef_s32_s ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32) %0:_(s32) = COPY $sgpr0 %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 ... --- -name: cttz_zero_undef_i32_v +name: cttz_zero_undef_s32_v legalized: true body: | bb.0: liveins: $vgpr0_vgpr1 - ; CHECK-LABEL: name: cttz_zero_undef_i32_v + ; CHECK-LABEL: name: cttz_zero_undef_s32_v ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 - ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s32) + ; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: cttz_zero_undef_s64_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1 + ; CHECK-LABEL: name: cttz_zero_undef_s64_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:sgpr(s32) = G_CTTZ_ZERO_UNDEF [[COPY]](s64) + ; CHECK: S_ENDPGM 0, implicit [[CTTZ_ZERO_UNDEF]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: cttz_zero_undef_s64_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1 + ; CHECK-LABEL: name: cttz_zero_undef_s64_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[COPY]](s64) + ; CHECK: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[UV]](s32), [[C]] + ; CHECK: [[CTTZ_ZERO_UNDEF:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV1]](s32) + ; CHECK: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 32 + ; CHECK: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[CTTZ_ZERO_UNDEF]], [[C1]] + ; CHECK: [[CTTZ_ZERO_UNDEF1:%[0-9]+]]:vgpr(s32) = G_CTTZ_ZERO_UNDEF [[UV]](s32) + ; CHECK: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[ICMP]](s1), [[ADD]], [[CTTZ_ZERO_UNDEF1]] + ; CHECK: S_ENDPGM 0, implicit [[SELECT]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s32) = G_CTTZ_ZERO_UNDEF %0 + S_ENDPGM 0, implicit %1 ...