Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3978,23 +3978,24 @@ if (TypeIdx != 1) return UnableToLegalize; - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { MachineIRBuilder &B = MIRBuilder; - auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1)); + auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi) auto C_0 = B.buildConstant(NarrowTy, 0); auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), UnmergeSrc.getReg(1), C_0); - auto LoCTLZ = B.buildCTLZ(NarrowTy, UnmergeSrc.getReg(0)); - auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize); - auto HiIsZeroCTLZ = B.buildAdd(NarrowTy, LoCTLZ, C_NarrowSize); - auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(1)); - auto LoOut = B.buildSelect(NarrowTy, HiIsZero, HiIsZeroCTLZ, HiCTLZ); - - B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)}); + auto LoCTLZ = B.buildCTLZ(DstTy, UnmergeSrc.getReg(0)); + auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); + auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize); + auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)); + B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ); MI.eraseFromParent(); return Legalized; @@ -4009,24 +4010,24 @@ if (TypeIdx != 1) return UnableToLegalize; - LLT SrcTy = MRI.getType(MI.getOperand(1).getReg()); + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); unsigned NarrowSize = NarrowTy.getSizeInBits(); if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) { MachineIRBuilder &B = MIRBuilder; - auto UnmergeSrc = B.buildUnmerge(NarrowTy, MI.getOperand(1)); + auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg); // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo) auto C_0 = B.buildConstant(NarrowTy, 0); auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1), UnmergeSrc.getReg(0), C_0); - auto HiCTTZ = B.buildCTTZ(NarrowTy, UnmergeSrc.getReg(1)); - auto C_NarrowSize = B.buildConstant(NarrowTy, NarrowSize); - auto LoIsZeroCTTZ = B.buildAdd(NarrowTy, HiCTTZ, C_NarrowSize); - auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(NarrowTy, UnmergeSrc.getReg(0)); - auto LoOut = B.buildSelect(NarrowTy, LoIsZero, LoIsZeroCTTZ, LoCTTZ); - - B.buildMerge(MI.getOperand(0), {LoOut.getReg(0), C_0.getReg(0)}); - + auto HiCTTZ = B.buildCTTZ(DstTy, UnmergeSrc.getReg(1)); + auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize); + auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize); + auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)); + B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ); MI.eraseFromParent(); return Legalized; } Index: llvm/lib/Target/Mips/MipsLegalizerInfo.cpp =================================================================== --- llvm/lib/Target/Mips/MipsLegalizerInfo.cpp +++ llvm/lib/Target/Mips/MipsLegalizerInfo.cpp @@ -204,12 +204,14 @@ getActionDefinitionsBuilder(G_CTLZ) .legalFor({{s32, s32}}) + .maxScalar(0, s32) .maxScalar(1, s32); getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) .lowerFor({{s32, s32}}); getActionDefinitionsBuilder(G_CTTZ) .lowerFor({{s32, s32}}) + .maxScalar(0, s32) .maxScalar(1, s32); getActionDefinitionsBuilder(G_CTTZ_ZERO_UNDEF) .lowerFor({{s32, s32}, {s64, s64}}); Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/ctlz.mir @@ -42,8 +42,10 @@ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND]](s32), [[ADD]], [[CTLZ1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) + ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C]](s32) + ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; MIPS32: $v0 = COPY [[UV]](s32) + ; MIPS32: $v1 = COPY [[UV1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 Index: llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir +++ llvm/test/CodeGen/Mips/GlobalISel/legalizer/cttz.mir @@ -57,8 +57,10 @@ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C3]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: $v0 = COPY [[SELECT]](s32) - ; MIPS32: $v1 = COPY [[C]](s32) + ; MIPS32: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C]](s32) + ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) + ; MIPS32: $v0 = COPY [[UV]](s32) + ; MIPS32: $v1 = COPY [[UV1]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 @@ -140,23 +142,25 @@ ; MIPS32: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; MIPS32: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] ; MIPS32: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[AND2]](s32), [[ADD1]], [[SUB1]] - ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[SELECT]], [[C]] + ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[SELECT]](s32), [[C1]](s32) + ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV1]](s64) + ; MIPS32: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[C]] ; MIPS32: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[ADD3]](s32), [[C]] - ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[C1]], [[C1]] + ; MIPS32: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[C1]] ; MIPS32: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) ; MIPS32: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; MIPS32: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[AND3]] - ; MIPS32: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) + ; MIPS32: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ADD3]](s32), [[ADD5]](s32) ; MIPS32: [[XOR2:%[0-9]+]]:_(s32) = G_XOR [[COPY]], [[C1]] ; MIPS32: [[XOR3:%[0-9]+]]:_(s32) = G_XOR [[COPY1]], [[C1]] ; MIPS32: [[OR:%[0-9]+]]:_(s32) = G_OR [[XOR2]], [[XOR3]] ; MIPS32: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), [[OR]](s32), [[C1]] ; MIPS32: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP2]](s32) ; MIPS32: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV1]] - ; MIPS32: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) - ; MIPS32: $v0 = COPY [[UV]](s32) - ; MIPS32: $v1 = COPY [[UV1]](s32) + ; MIPS32: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[AND4]](s32), [[MV]], [[MV2]] + ; MIPS32: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SELECT1]](s64) + ; MIPS32: $v0 = COPY [[UV2]](s32) + ; MIPS32: $v1 = COPY [[UV3]](s32) ; MIPS32: RetRA implicit $v0, implicit $v1 %1:_(s32) = COPY $a0 %2:_(s32) = COPY $a1 Index: llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp =================================================================== --- llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp +++ llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp @@ -90,6 +90,76 @@ EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; } +// CTLZ scalar narrowing +TEST_F(GISelMITest, NarrowScalarCTLZ) { + setUp(); + if (!TM) + return; + + // Declare your legalization info + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTLZ).legalFor({{s32, s64}}); + }); + // Build Instr + auto CTLZ = + B.buildInstr(TargetOpcode::G_CTLZ, {LLT::scalar(32)}, {Copies[0]}); + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + // Perform Legalization + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.narrowScalar(*CTLZ, 1, LLT::scalar(32))); + + auto CheckStr = R"( + CHECK: [[UNMERGE_LO:%[0-9]+]]:_(s32), [[UNMERGE_HI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %0:_(s64) + CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UNMERGE_HI]]:_(s32), [[ZERO]]:_ + CHECK: [[CTLZ_LO:%[0-9]+]]:_(s32) = G_CTLZ [[UNMERGE_LO]]:_(s32) + CHECK: [[THIRTYTWO:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTLZ_LO]]:_, [[THIRTYTWO]]:_ + CHECK: [[CTLZ_HI:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UNMERGE_HI]]:_(s32) + CHECK: %{{[0-9]+}}:_(s32) = G_SELECT [[CMP]]:_(s1), [[ADD]]:_, [[CTLZ_HI]]:_ + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + +// CTTZ scalar narrowing +TEST_F(GISelMITest, NarrowScalarCTTZ) { + setUp(); + if (!TM) + return; + + // Declare your legalization info + DefineLegalizerInfo(A, { + getActionDefinitionsBuilder(G_CTTZ).legalFor({{s32, s64}}); + }); + // Build Instr + auto CTTZ = + B.buildInstr(TargetOpcode::G_CTTZ, {LLT::scalar(32)}, {Copies[0]}); + AInfo Info(MF->getSubtarget()); + DummyGISelObserver Observer; + LegalizerHelper Helper(*MF, Info, Observer, B); + // Perform Legalization + EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized, + Helper.narrowScalar(*CTTZ, 1, LLT::scalar(32))); + + auto CheckStr = R"( + CHECK: [[UNMERGE_LO:%[0-9]+]]:_(s32), [[UNMERGE_HI:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES %0:_(s64) + CHECK: [[ZERO:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + CHECK: [[CMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[UNMERGE_LO]]:_(s32), [[ZERO]]:_ + CHECK: [[CTTZ_HI:%[0-9]+]]:_(s32) = G_CTTZ [[UNMERGE_HI]]:_(s32) + CHECK: [[THIRTYTWO:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[CTTZ_HI]]:_, [[THIRTYTWO]]:_ + CHECK: [[CTTZ_LO:%[0-9]+]]:_(s32) = G_CTTZ_ZERO_UNDEF [[UNMERGE_LO]]:_(s32) + CHECK: %{{[0-9]+}}:_(s32) = G_SELECT [[CMP]]:_(s1), [[ADD]]:_, [[CTTZ_LO]]:_ + )"; + + // Check + EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF; +} + // CTTZ expansion in terms of CTPOP TEST_F(GISelMITest, LowerBitCountingCTTZ2) { setUp();