diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -241,6 +241,7 @@ LegalizeResult narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerBitcast(MachineInstr &MI); LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -977,8 +977,13 @@ case TargetOpcode::G_CTTZ: case TargetOpcode::G_CTTZ_ZERO_UNDEF: case TargetOpcode::G_CTPOP: - if (TypeIdx != 0) - return UnableToLegalize; // TODO + if (TypeIdx == 1) + switch (MI.getOpcode()) { + case TargetOpcode::G_CTLZ: + return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); + default: + return UnableToLegalize; + } Observer.changingInstr(MI); narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT); @@ -3850,6 +3855,37 @@ } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + + if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + MachineIRBuilder &B = MIRBuilder; + auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1)); + // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) + auto C_0 = B.buildConstant(NarrowTy, 0); + auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + UnmergeSrc.getReg(1), C_0); + auto LoCTLZ = B.buildCTLZ(NarrowTy, UnmergeSrc.getReg(0)); + auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize); + auto HiIsZeroCTLZ = B.buildAdd(NarrowTy, LoCTLZ, C_NarrowSize); + auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(1)); + auto LoOut = B.buildSelect(NarrowTy, HiIsZero, HiIsZeroCTLZ, HiCTLZ); + + B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)}); + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { unsigned Opc = MI.getOpcode(); auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -202,6 +202,12 @@ .lowerFor({s32}) .maxScalar(0, s32); + getActionDefinitionsBuilder(G_CTLZ) + .legalFor({{s32, s32}}) + .maxScalar(1, s32); + getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) + .lowerFor({{s32, s32}}); + // FP instructions getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}); diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -453,6 +453,7 @@ case G_BRINDIRECT: case G_VASTART: case G_BSWAP: + case G_CTLZ: OperandsMapping = &Mips::ValueMappings[Mips::GPRIdx]; break; case G_ADD: diff --git a/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/ctlz.mir b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/ctlz.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/instruction-select/ctlz.mir @@ -0,0 +1,24 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 +--- +name: ctlz_i32 +alignment: 4 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; MIPS32-LABEL: name: ctlz_i32 + ; MIPS32: liveins: $a0 + ; MIPS32: [[COPY:%[0-9]+]]:gpr32 = COPY $a0 + ; MIPS32: [[CLZ:%[0-9]+]]:gpr32 = CLZ [[COPY]] + ; MIPS32: $v0 = COPY [[CLZ]] + ; MIPS32: RetRA implicit $v0 + %0:gprb(s32) = COPY $a0 + %1:gprb(s32) = G_CTLZ %0(s32) + $v0 = COPY %1(s32) + RetRA implicit $v0 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir @@ -0,0 +1,57 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 +--- +name: ctlz_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; MIPS32-LABEL: name: ctlz_i32 + ; MIPS32: liveins: $a0 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32: $v0 = COPY [[CTLZ]](s32) + ; MIPS32: RetRA implicit $v0 + %0:_(s32) = COPY $a0 + %1:_(s32) = G_CTLZ %0(s32) + $v0 = COPY %1(s32) + RetRA implicit $v0 + +... +--- +name: ctlz_i64 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; MIPS32-LABEL: name: ctlz_i64 + ; MIPS32: liveins: $a0, $a1 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY1]](s32), [[C]] + ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ]], [[C1]] + ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[COPY1]](s32) + ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]] + ; MIPS32: $v0 = COPY [[SELECT]](s32) + ; MIPS32: $v1 = COPY [[C]](s32) + ; MIPS32: RetRA implicit $v0, implicit $v1 + %1:_(s32) = COPY $a0 + %2:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %3:_(s64) = G_CTLZ %0(s64) + %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3(s64) + $v0 = COPY %4(s32) + $v1 = COPY %5(s32) + RetRA implicit $v0, implicit $v1 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32 + +define i32 @ctlz_i32(i32 %a) { +; MIPS32-LABEL: ctlz_i32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: clz $2, $4 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %0 = call i32 @llvm.ctlz.i32(i32 %a, i1 false) + ret i32 %0 +} +declare i32 @llvm.ctlz.i32(i32, i1 immarg) + + +define i64 @ctlz_i64(i64 %a) { +; MIPS32-LABEL: ctlz_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: ori $3, $zero, 0 +; MIPS32-NEXT: sltiu $1, $5, 1 +; MIPS32-NEXT: clz $2, $4 +; MIPS32-NEXT: addiu $2, $2, 32 +; MIPS32-NEXT: clz $4, $5 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: movn $4, $2, $1 +; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %0 = call i64 @llvm.ctlz.i64(i64 %a, i1 false) + ret i64 %0 +} +declare i64 @llvm.ctlz.i64(i64, i1 immarg) diff --git a/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/ctlz.mir b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/ctlz.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/regbankselect/ctlz.mir @@ -0,0 +1,23 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 +--- +name: ctlz_i32 +alignment: 4 +legalized: true +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; MIPS32-LABEL: name: ctlz_i32 + ; MIPS32: liveins: $a0 + ; MIPS32: [[COPY:%[0-9]+]]:gprb(s32) = COPY $a0 + ; MIPS32: [[CTLZ:%[0-9]+]]:gprb(s32) = G_CTLZ [[COPY]](s32) + ; MIPS32: $v0 = COPY [[CTLZ]](s32) + ; MIPS32: RetRA implicit $v0 + %0:_(s32) = COPY $a0 + %1:_(s32) = G_CTLZ %0(s32) + $v0 = COPY %1(s32) + RetRA implicit $v0 + +...