diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -242,6 +242,7 @@ LegalizeResult narrowScalarExt(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); + LegalizeResult narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, LLT Ty); LegalizeResult lowerBitcast(MachineInstr &MI); LegalizeResult lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty); diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -981,6 +981,8 @@ switch (MI.getOpcode()) { case TargetOpcode::G_CTLZ: return narrowScalarCTLZ(MI, TypeIdx, NarrowTy); + case TargetOpcode::G_CTTZ: + return narrowScalarCTTZ(MI, TypeIdx, NarrowTy); default: return UnableToLegalize; } @@ -3886,6 +3888,37 @@ } LegalizerHelper::LegalizeResult +LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx, + LLT NarrowTy) { + if (TypeIdx != 1) + return UnableToLegalize; + + LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + unsigned NarrowSize = NarrowTy.getSizeInBits(); + + if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { + MachineIRBuilder &B = MIRBuilder; + auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1)); + // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) + auto C_0 = B.buildConstant(NarrowTy, 0); + auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), + UnmergeSrc.getReg(0), C_0); + auto HiCTTZ = B.buildCTTZ(NarrowTy, UnmergeSrc.getReg(1)); + auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize); + auto LoIsZeroCTTZ = B.buildAdd(NarrowTy, HiCTTZ, C_NarrowSize); + auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(0)); + auto LoOut = B.buildSelect(NarrowTy, LoIsZero, LoIsZeroCTTZ, LoCTTZ); + + B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)}); + + MI.eraseFromParent(); + return Legalized; + } + + return UnableToLegalize; +} + +LegalizerHelper::LegalizeResult LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) { unsigned Opc = MI.getOpcode(); auto &TII = *MI.getMF()->getSubtarget().getInstrInfo(); diff --git a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp --- a/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ b/llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -208,6 +208,12 @@ getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) .lowerFor({{s32, s32}}); + getActionDefinitionsBuilder(G_CTTZ) + .lowerFor({{s32, s32}}) + .maxScalar(1, s32); + getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF) + .lowerFor({{s32, s32}, {s64, s64}}); + // FP instructions getActionDefinitionsBuilder(G_FCONSTANT) .legalFor({s32, s64}); diff --git a/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir @@ -0,0 +1,175 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=legalizer -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 +--- +name: cttz_i32 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; MIPS32-LABEL: name: cttz_i32 + ; MIPS32: liveins: $a0 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C]] + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C]] + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[CTLZ]] + ; MIPS32: $v0 = COPY [[SUB]](s32) + ; MIPS32: RetRA implicit $v0 + %0:_(s32) = COPY $a0 + %1:_(s32) = G_CTTZ %0(s32) + $v0 = COPY %1(s32) + RetRA implicit $v0 + +... +--- +name: cttz_i64 +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; MIPS32-LABEL: name: cttz_i64 + ; MIPS32: liveins: $a0, $a1 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C1]] + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ]] + ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C2]] + ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] + ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C1]] + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] + ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) + ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ1]] + ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] + ; MIPS32: $v0 = COPY [[SELECT]](s32) + ; MIPS32: $v1 = COPY [[C]](s32) + ; MIPS32: RetRA implicit $v0, implicit $v1 + %1:_(s32) = COPY $a0 + %2:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %3:_(s64) = G_CTTZ %0(s64) + %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3(s64) + $v0 = COPY %4(s32) + $v1 = COPY %5(s32) + RetRA implicit $v0, implicit $v1 + +... +--- +name: ffs_i32_expansion +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0 + + ; MIPS32-LABEL: name: ffs_i32_expansion + ; MIPS32: liveins: $a0 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] + ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = nuw nsw G_ADD [[SUB]], [[C]] + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND1]](s32), [[C1]], [[ADD1]] + ; MIPS32: $v0 = COPY [[SELECT]](s32) + ; MIPS32: RetRA implicit $v0 + %0:_(s32) = COPY $a0 + %2:_(s32) = G_CONSTANT i32 1 + %4:_(s32) = G_CONSTANT i32 0 + %1:_(s32) = G_CTTZ_ZERO_UNDEF %0(s32) + %3:_(s32) = nuw nsw G_ADD %1, %2 + %5:_(s1) = G_ICMP intpred(eq), %0(s32), %4 + %6:_(s32) = G_SELECT %5(s1), %4, %3 + $v0 = COPY %6(s32) + RetRA implicit $v0 + +... +--- +name: ffs_i64_expansion +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; MIPS32-LABEL: name: ffs_i64_expansion + ; MIPS32: liveins: $a0, $a1 + ; MIPS32: [[COPY:%[0-9]+]]:_(s32) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(s32) = COPY $a1 + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C1]](s32), [[C1]](s32) + ; MIPS32: [[ICMP:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[COPY]](s32), [[C1]] + ; MIPS32: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; MIPS32: [[XOR:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C2]] + ; MIPS32: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C2]] + ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[ADD]] + ; MIPS32: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; MIPS32: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32) + ; MIPS32: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ]] + ; MIPS32: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[SUB]], [[C3]] + ; MIPS32: [[XOR1:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C2]] + ; MIPS32: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[C2]] + ; MIPS32: [[AND1:%[0-9]+]]:_(s32) = G_AND [[XOR1]], [[ADD2]] + ; MIPS32: [[CTLZ1:%[0-9]+]]:_(s32) = G_CTLZ [[AND1]](s32) + ; MIPS32: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ1]] + ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) + ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] + ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] + ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] + ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] + ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] + ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[AND3]] + ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) + ; MIPS32: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] + ; MIPS32: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] + ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] + ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] + ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) + ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] + ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV1]] + ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) + ; MIPS32: $v0 = COPY [[UV]](s32) + ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: RetRA implicit $v0, implicit $v1 + %1:_(s32) = COPY $a0 + %2:_(s32) = COPY $a1 + %0:_(s64) = G_MERGE_VALUES %1(s32), %2(s32) + %4:_(s64) = G_CONSTANT i64 1 + %6:_(s64) = G_CONSTANT i64 0 + %3:_(s64) = G_CTTZ_ZERO_UNDEF %0(s64) + %5:_(s64) = nuw nsw G_ADD %3, %4 + %7:_(s1) = G_ICMP intpred(eq), %0(s64), %6 + %8:_(s64) = G_SELECT %7(s1), %6, %5 + %9:_(s32), %10:_(s32) = G_UNMERGE_VALUES %8(s64) + $v0 = COPY %9(s32) + $v1 = COPY %10(s32) + RetRA implicit $v0, implicit $v1 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 -mtriple=mipsel-linux-gnu -global-isel -verify-machineinstrs %s -o -| FileCheck %s -check-prefixes=MIPS32 + +define i32 @cttz_i32(i32 %a) { +; MIPS32-LABEL: cttz_i32: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: not $1, $4 +; MIPS32-NEXT: addiu $2, $4, -1 +; MIPS32-NEXT: and $1, $1, $2 +; MIPS32-NEXT: ori $2, $zero, 32 +; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: subu $2, $2, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %0 = call i32 @llvm.cttz.i32(i32 %a, i1 false) + ret i32 %0 +} +declare i32 @llvm.cttz.i32(i32, i1 immarg) + +define i64 @cttz_i64(i64 %a) { +; MIPS32-LABEL: cttz_i64: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: ori $3, $zero, 0 +; MIPS32-NEXT: sltiu $1, $4, 1 +; MIPS32-NEXT: not $2, $5 +; MIPS32-NEXT: addiu $5, $5, -1 +; MIPS32-NEXT: and $2, $2, $5 +; MIPS32-NEXT: ori $5, $zero, 32 +; MIPS32-NEXT: clz $2, $2 +; MIPS32-NEXT: subu $2, $5, $2 +; MIPS32-NEXT: addiu $2, $2, 32 +; MIPS32-NEXT: not $6, $4 +; MIPS32-NEXT: addiu $4, $4, -1 +; MIPS32-NEXT: and $4, $6, $4 +; MIPS32-NEXT: clz $4, $4 +; MIPS32-NEXT: subu $4, $5, $4 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: movn $4, $2, $1 +; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %0 = call i64 @llvm.cttz.i64(i64 %a, i1 false) + ret i64 %0 +} +declare i64 @llvm.cttz.i64(i64, i1 immarg) + + +define i32 @ffs_i32_expansion(i32 %a) { +; MIPS32-LABEL: ffs_i32_expansion: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: not $2, $4 +; MIPS32-NEXT: addiu $3, $4, -1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: ori $3, $zero, 32 +; MIPS32-NEXT: clz $2, $2 +; MIPS32-NEXT: subu $2, $3, $2 +; MIPS32-NEXT: addiu $2, $2, 1 +; MIPS32-NEXT: sltiu $3, $4, 1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %0 = call i32 @llvm.cttz.i32(i32 %a, i1 true) + %1 = add nuw nsw i32 %0, 1 + %iszero = icmp eq i32 %a, 0 + %ffs = select i1 %iszero, i32 0, i32 %1 + ret i32 %ffs +} + +define i64 @ffs_i64_expansion(i64 %a) { +; MIPS32-LABEL: ffs_i64_expansion: +; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: ori $1, $zero, 1 +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: sltiu $3, $4, 1 +; MIPS32-NEXT: not $6, $5 +; MIPS32-NEXT: addiu $7, $5, -1 +; MIPS32-NEXT: and $6, $6, $7 +; MIPS32-NEXT: ori $7, $zero, 32 +; MIPS32-NEXT: clz $6, $6 +; MIPS32-NEXT: subu $6, $7, $6 +; MIPS32-NEXT: addiu $6, $6, 32 +; MIPS32-NEXT: not $8, $4 +; MIPS32-NEXT: addiu $9, $4, -1 +; MIPS32-NEXT: and $8, $8, $9 +; MIPS32-NEXT: clz $8, $8 +; MIPS32-NEXT: subu $7, $7, $8 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $7, $6, $3 +; MIPS32-NEXT: addiu $3, $7, 1 +; MIPS32-NEXT: sltu $1, $3, $1 +; MIPS32-NEXT: addiu $6, $2, 0 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: addu $1, $6, $1 +; MIPS32-NEXT: xori $4, $4, 0 +; MIPS32-NEXT: xori $5, $5, 0 +; MIPS32-NEXT: or $4, $4, $5 +; MIPS32-NEXT: sltiu $4, $4, 1 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $3, $2, $4 +; MIPS32-NEXT: movn $1, $2, $4 +; MIPS32-NEXT: move $2, $3 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: jr $ra +; MIPS32-NEXT: nop +entry: + %0 = call i64 @llvm.cttz.i64(i64 %a, i1 true) + %1 = add nuw nsw i64 %0, 1 + %iszero = icmp eq i64 %a, 0 + %ffs = select i1 %iszero, i64 0, i64 %1 + ret i64 %ffs +}