diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -46,7 +46,8 @@ : Builder(B), MRI(MRI), LI(LI) {} bool tryCombineAnyExt(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); Builder.setInstr(MI); @@ -58,6 +59,7 @@ if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } @@ -70,6 +72,7 @@ m_GSExt(m_Reg(ExtSrc)), m_GZExt(m_Reg(ExtSrc)))))) { Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *ExtMI, DeadInsts); return true; } @@ -83,15 +86,17 @@ auto &CstVal = SrcMI->getOperand(1); Builder.buildConstant( DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *SrcMI, DeadInsts); return true; } } - return tryFoldImplicitDef(MI, DeadInsts); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } bool tryCombineZExt(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_ZEXT); Builder.setInstr(MI); @@ -124,15 +129,17 @@ auto &CstVal = SrcMI->getOperand(1); Builder.buildConstant( DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *SrcMI, DeadInsts); return true; } } - return tryFoldImplicitDef(MI, DeadInsts); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } bool tryCombineSExt(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_SEXT); Builder.setInstr(MI); @@ -154,11 +161,12 @@ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); return true; } - return tryFoldImplicitDef(MI, DeadInsts); + return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); } bool tryCombineTrunc(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_TRUNC); Builder.setInstr(MI); @@ -174,6 +182,7 @@ auto &CstVal = SrcMI->getOperand(1); Builder.buildConstant( DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *SrcMI, DeadInsts); return true; } @@ -182,10 +191,10 @@ return false; } - /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). bool tryFoldImplicitDef(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { unsigned Opcode = MI.getOpcode(); assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || Opcode == TargetOpcode::G_SEXT); @@ -202,6 +211,7 @@ return false; LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); + UpdatedDefs.push_back(DstReg); } else { // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. @@ -209,6 +219,7 @@ return false; LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); Builder.buildConstant(DstReg, 0); + UpdatedDefs.push_back(DstReg); } markInstAndDefDead(MI, *DefMI, DeadInsts); @@ -269,7 +280,8 @@ } bool tryCombineMerges(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); unsigned NumDefs = MI.getNumOperands() - 1; @@ -319,8 +331,8 @@ SmallVector TmpRegs; // This is a vector that is being scalarized and casted. Extract to // the element type, and do the conversion on the scalars. - LLT MergeEltTy - = MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); + LLT MergeEltTy = + MRI.getType(MergeI->getOperand(0).getReg()).getElementType(); for (unsigned j = 0; j < NumMergeRegs; ++j) TmpRegs.push_back(MRI.createGenericVirtualRegister(MergeEltTy)); @@ -331,6 +343,7 @@ } else { Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); } + UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); } } else if (NumMergeRegs > NumDefs) { @@ -352,7 +365,9 @@ ++j, ++Idx) Regs.push_back(MergeI->getOperand(Idx).getReg()); - Builder.buildMerge(MI.getOperand(DefIdx).getReg(), Regs); + Register DefReg = MI.getOperand(DefIdx).getReg(); + Builder.buildMerge(DefReg, Regs); + UpdatedDefs.push_back(DefReg); } } else { @@ -366,8 +381,9 @@ for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); - Builder.buildInstr(ConvertOp, {MI.getOperand(Idx).getReg()}, - {MergeSrc}); + Register DefReg = MI.getOperand(Idx).getReg(); + Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); + UpdatedDefs.push_back(DefReg); } markInstAndDefDead(MI, *MergeI, DeadInsts); @@ -378,9 +394,11 @@ "Bitcast and the other kinds of conversions should " "have happened earlier"); - for (unsigned Idx = 0; Idx < NumDefs; ++Idx) - MRI.replaceRegWith(MI.getOperand(Idx).getReg(), - MergeI->getOperand(Idx + 1).getReg()); + for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { + Register NewDef = MergeI->getOperand(Idx + 1).getReg(); + MRI.replaceRegWith(MI.getOperand(Idx).getReg(), NewDef); + UpdatedDefs.push_back(NewDef); + } } markInstAndDefDead(MI, *MergeI, DeadInsts); @@ -399,7 +417,8 @@ } bool tryCombineExtract(MachineInstr &MI, - SmallVectorImpl &DeadInsts) { + SmallVectorImpl &DeadInsts, + SmallVectorImpl &UpdatedDefs) { assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); // Try to use the source registers from a G_MERGE_VALUES @@ -414,13 +433,14 @@ // for N >= %2.getSizeInBits() / 2 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() - unsigned Src = lookThroughCopyInstrs(MI.getOperand(1).getReg()); - MachineInstr *MergeI = MRI.getVRegDef(Src); + Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); + MachineInstr *MergeI = MRI.getVRegDef(SrcReg); if (!MergeI || !isMergeLikeOpcode(MergeI->getOpcode())) return false; - LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); - LLT SrcTy = MRI.getType(Src); + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + LLT SrcTy = MRI.getType(SrcReg); // TODO: Do we need to check if the resulting extract is supported? unsigned ExtractDstSize = DstTy.getSizeInBits(); @@ -438,10 +458,9 @@ // TODO: We could modify MI in place in most cases. Builder.setInstr(MI); - Builder.buildExtract( - MI.getOperand(0).getReg(), - MergeI->getOperand(MergeSrcIdx + 1).getReg(), - Offset - MergeSrcIdx * MergeSrcSize); + Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), + Offset - MergeSrcIdx * MergeSrcSize); + UpdatedDefs.push_back(DstReg); markInstAndDefDead(MI, *MergeI, DeadInsts); return true; } @@ -458,33 +477,79 @@ // etc, process the dead instructions now if any. if (!DeadInsts.empty()) deleteMarkedDeadInsts(DeadInsts, WrapperObserver); + + // Put here every vreg that was redefined in such a way that it's at least + // possible that one (or more) of its users (immediate or COPY-separated) + // could become artifact combinable with the new definition (or the + // instruction reachable from it through a chain of copies if any). + SmallVector UpdatedDefs; + bool Changed = false; switch (MI.getOpcode()) { default: return false; case TargetOpcode::G_ANYEXT: - return tryCombineAnyExt(MI, DeadInsts); + Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_ZEXT: - return tryCombineZExt(MI, DeadInsts); + Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_SEXT: - return tryCombineSExt(MI, DeadInsts); + Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_UNMERGE_VALUES: - return tryCombineMerges(MI, DeadInsts); + Changed = tryCombineMerges(MI, DeadInsts, UpdatedDefs); + break; case TargetOpcode::G_EXTRACT: - return tryCombineExtract(MI, DeadInsts); - case TargetOpcode::G_TRUNC: { - if (tryCombineTrunc(MI, DeadInsts)) - return true; - - bool Changed = false; - for (auto &Use : MRI.use_instructions(MI.getOperand(0).getReg())) - Changed |= tryCombineInstruction(Use, DeadInsts, WrapperObserver); - return Changed; + Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); + break; + case TargetOpcode::G_TRUNC: + Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs); + if (!Changed) { + // Try to combine truncates away even if they are legal. As all artifact + // combines at the moment look only "up" the def-use chains, we achieve + // that by throwing truncates' users (with look through copies) into the + // ArtifactList again. + UpdatedDefs.push_back(MI.getOperand(0).getReg()); + } + break; } + // If the main loop through the ArtifactList found at least one combinable + // pair of artifacts, not only combine it away (as done above), but also + // follow the def-use chain from there to combine everything that can be + // combined within this def-use chain of artifacts. + while (!UpdatedDefs.empty()) { + Register NewDef = UpdatedDefs.pop_back_val(); + assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); + for (MachineInstr &Use : MRI.use_instructions(NewDef)) { + switch (Use.getOpcode()) { + // Keep this list in sync with the list of all artifact combines. + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_UNMERGE_VALUES: + case TargetOpcode::G_EXTRACT: + case TargetOpcode::G_TRUNC: + // Adding Use to ArtifactList. + WrapperObserver.changedInstr(Use); + break; + case TargetOpcode::COPY: { + Register Copy = Use.getOperand(0).getReg(); + if (Copy.isVirtual()) + UpdatedDefs.push_back(Copy); + break; + } + default: + // If we do not have an artifact combine for the opcode, there is no + // point in adding it to the ArtifactList as nothing interesting will + // be done to it anyway. + break; + } + } } + return Changed; } private: - static unsigned getArtifactSrcReg(const MachineInstr &MI) { switch (MI.getOpcode()) { case TargetOpcode::COPY: diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -212,6 +212,11 @@ // ArtifactCombiner to combine away them. if (isArtifact(MI)) { LLVM_DEBUG(dbgs() << ".. Not legalized, moving to artifacts retry\n"); + assert(NumArtifacts == 0 && + "Artifacts are only expected in instruction list starting the " + "second iteration, but each iteration starting second must " + "start with an empty artifacts list"); + (void)NumArtifacts; RetryList.push_back(&MI); continue; } @@ -224,7 +229,7 @@ // Try to combine the instructions in RetryList again if there // are new artifacts. If not, stop legalizing. if (!RetryList.empty()) { - if (ArtifactList.size() > NumArtifacts) { + if (!ArtifactList.empty()) { while (!RetryList.empty()) ArtifactList.insert(RetryList.pop_back_val()); } else { diff --git a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp --- a/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp +++ b/llvm/lib/Target/Mips/MipsRegisterBankInfo.cpp @@ -651,8 +651,9 @@ static void combineAwayG_UNMERGE_VALUES(LegalizationArtifactCombiner &ArtCombiner, MachineInstr &MI) { + SmallVector UpdatedDefs; SmallVector DeadInstrs; - ArtCombiner.tryCombineMerges(MI, DeadInstrs); + ArtCombiner.tryCombineMerges(MI, DeadInstrs, UpdatedDefs); for (MachineInstr *DeadMI : DeadInstrs) DeadMI->eraseFromParent(); } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-fallback.ll @@ -167,7 +167,7 @@ ret void } -; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %5:fpr32(s32) = G_EXTRACT %{{[0-9]+}}:fpr(s128), 64 (in function: nonpow2_store_narrowing) +; FALLBACK-WITH-REPORT-ERR: remark: :0:0: cannot select: %14:gpr64(s64), %15:gpr(s1) = G_UADDE %17:gpr, %17:gpr, %13:gpr (in function: nonpow2_store_narrowing) ; FALLBACK-WITH-REPORT-ERR: warning: Instruction selection used fallback path for nonpow2_store_narrowing ; FALLBACK-WITH-REPORT-OUT-LABEL: nonpow2_store_narrowing: define void @nonpow2_store_narrowing(i96* %c) { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-merge-values.mir @@ -15,9 +15,7 @@ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[COPY1]] - ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR]](s32) - ; CHECK: [[COPY2:%[0-9]+]]:_(s8) = COPY [[TRUNC]](s8) - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[COPY2]](s8) + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[OR]](s32) ; CHECK: $x0 = COPY [[ANYEXT]](s64) %0:_(s64) = G_CONSTANT i64 0 %1:_(s4) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-combiner-zext-trunc-crash.mir @@ -22,18 +22,19 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[ICMP]](s32) ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[DEF1]] + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32) ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -33 - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]] - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C2]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -65 - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY1]], [[C3]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) - ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY2]], [[C3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ADD]](s32) + ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C]] ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 26 ; CHECK: [[ICMP1:%[0-9]+]]:_(s32) = G_ICMP intpred(ult), [[AND2]](s32), [[C4]] - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[OR]](s32) - ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY3]], [[COPY4]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ICMP1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[OR]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY4]], [[COPY5]] ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[OR1]](s32) ; CHECK: G_BRCOND [[TRUNC]](s1), %bb.2 ; CHECK: bb.2: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/retry-artifact-combine.mir @@ -8,14 +8,12 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 ; CHECK: [[FCMP:%[0-9]+]]:_(s32) = G_FCMP floatpred(ogt), [[COPY]](s32), [[COPY1]] - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32) ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) - ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[COPY3]] - ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[AND]](s32) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C]] - ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) - ; CHECK: $w0 = COPY [[COPY5]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FCMP]](s32) + ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C]] + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[AND1]](s32) + ; CHECK: $w0 = COPY [[COPY3]](s32) %0:_(s32) = COPY $w0 %1:_(s32) = COPY $w1 %2:_(s1) = G_FCMP floatpred(ogt), %0(s32), %1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-sext.mir @@ -36,15 +36,14 @@ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>) ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[UV]](s32) ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[UV1]](s32) - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[TRUNC]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[TRUNC1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[TRUNC2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[C]](s64) - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[TRUNC3]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 48 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT]], [[COPY1]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[ANYEXT1]], [[C]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[SHL]], [[COPY2]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[SHL1]], [[COPY3]](s32) ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ASHR]](s64), [[ASHR1]](s64) ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 @@ -64,27 +63,28 @@ ; CHECK-LABEL: name: test_sext_trunc_v2s32_to_v2s8_to_v2s16 ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1 ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[COPY]](<2 x s32>) - ; CHECK: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[TRUNC]](<2 x s16>) - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C1]](s32) - ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[ZEXT]](s32) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32) - ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) - ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[ZEXT1]](s32) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32) - ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC1]](s16) - ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT2]](s32) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32) - ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16) - ; CHECK: [[SEXT1:%[0-9]+]]:_(s32) = G_SEXT [[TRUNC2]](s16) - ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[SEXT1]], [[ZEXT3]](s32) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) - ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC3]](s16), [[TRUNC4]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32) + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY2]], [[COPY1]](s32) + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C1]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) + ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY5]], [[C]](s32) + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SHL2]], [[C]](s32) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s32) = G_ASHR [[ASHR]], [[COPY4]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR1]](s32) + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C1]](s32) + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SHL1]](s32) + ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY7]], [[C]](s32) + ; CHECK: [[ASHR2:%[0-9]+]]:_(s32) = G_ASHR [[SHL3]], [[C]](s32) + ; CHECK: [[ASHR3:%[0-9]+]]:_(s32) = G_ASHR [[ASHR2]], [[COPY6]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR3]](s32) + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[TRUNC1]](s16), [[TRUNC2]](s16) ; CHECK: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 %1:_(<2 x s8>) = G_TRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fcopysign.mir @@ -608,16 +608,15 @@ ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; SI: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] ; SI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; SI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; SI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[TRUNC1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32) + ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32) ; SI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; SI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] @@ -632,16 +631,15 @@ ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; VI: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] ; VI: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; VI: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; VI: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[TRUNC1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32) + ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32) ; VI: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; VI: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] @@ -656,16 +654,15 @@ ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) ; GFX9: [[AND:%[0-9]+]]:_(s64) = G_AND [[UV]], [[C1]] ; GFX9: [[AND1:%[0-9]+]]:_(s64) = G_AND [[UV1]], [[C1]] - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>) ; GFX9: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[UV2]](s32) ; GFX9: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT [[UV3]](s32) ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[ZEXT]](s64), [[ZEXT1]](s64) ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[TRUNC1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV4]], [[COPY2]](s32) + ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[C2]](s32) ; GFX9: [[AND2:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C]] ; GFX9: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL1]], [[C]] ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[AND]], [[AND2]] @@ -693,15 +690,14 @@ ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; SI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; SI: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) - ; SI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC1]](s32) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; SI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; SI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; SI: [[TRUNC2:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) - ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC2]], [[BUILD_VECTOR]] + ; SI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) + ; SI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] ; SI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; SI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) ; VI-LABEL: name: test_copysign_v2s32_v2s64 @@ -712,15 +708,14 @@ ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; VI: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; VI: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) - ; VI: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC1]](s32) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; VI: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; VI: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; VI: [[TRUNC2:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) - ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC2]], [[BUILD_VECTOR]] + ; VI: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) + ; VI: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] ; VI: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; VI: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) ; GFX9-LABEL: name: test_copysign_v2s32_v2s64 @@ -731,15 +726,14 @@ ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2147483647 ; GFX9: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32) ; GFX9: [[AND:%[0-9]+]]:_(<2 x s32>) = G_AND [[COPY]], [[BUILD_VECTOR1]] - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) - ; GFX9: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[C2]](s64) - ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC1]](s32) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32 + ; GFX9: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C2]](s32) + ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[COPY2]](s32) + ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[C2]](s32) ; GFX9: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[LSHR]](s64), [[LSHR1]](s64) - ; GFX9: [[TRUNC2:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) - ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC2]], [[BUILD_VECTOR]] + ; GFX9: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s64>) + ; GFX9: [[AND1:%[0-9]+]]:_(<2 x s32>) = G_AND [[TRUNC]], [[BUILD_VECTOR]] ; GFX9: [[OR:%[0-9]+]]:_(<2 x s32>) = G_OR [[AND]], [[AND1]] ; GFX9: $vgpr0_vgpr1 = COPY [[OR]](<2 x s32>) %0:_(<2 x s32>) = COPY $vgpr0_vgpr1 diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp --- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp +++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp @@ -26,6 +26,29 @@ << "unable to legalize instruction: " << MISStream.str(); } +DefineLegalizerInfo(ALegalizer, { + auto p0 = LLT::pointer(0, 64); + auto v2s8 = LLT::vector(2, 8); + auto v2s16 = LLT::vector(2, 16); + getActionDefinitionsBuilder(G_LOAD) + .legalForTypesWithMemDesc({{s16, p0, 8, 8}}) + .scalarize(0) + .clampScalar(0, s16, s16); + getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}}); + getActionDefinitionsBuilder(G_CONSTANT).legalFor({s32, s64}); + getActionDefinitionsBuilder(G_BUILD_VECTOR) + .legalFor({{v2s16, s16}}) + .clampScalar(1, s16, s16); + getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).legalFor({{v2s8, s16}}); + getActionDefinitionsBuilder(G_ANYEXT).legalFor({{s32, s16}}); + getActionDefinitionsBuilder(G_ZEXT).legalFor({{s32, s16}}); + getActionDefinitionsBuilder(G_SEXT).legalFor({{s32, s16}}); + getActionDefinitionsBuilder(G_AND).legalFor({s32}); + getActionDefinitionsBuilder(G_SEXT_INREG).lower(); + getActionDefinitionsBuilder(G_ASHR).legalFor({{s32, s32}}); + getActionDefinitionsBuilder(G_SHL).legalFor({{s32, s32}}); +}); + TEST_F(GISelMITest, BasicLegalizerTest) { StringRef MIRString = R"( %vptr:_(p0) = COPY $x4 @@ -36,23 +59,6 @@ if (!TM) return; - DefineLegalizerInfo(ALegalizer, { - auto p0 = LLT::pointer(0, 64); - auto v2s8 = LLT::vector(2, 8); - auto v2s16 = LLT::vector(2, 16); - getActionDefinitionsBuilder(G_LOAD) - .legalForTypesWithMemDesc({{s16, p0, 8, 8}}) - .scalarize(0) - .clampScalar(0, s16, s16); - getActionDefinitionsBuilder(G_PTR_ADD).legalFor({{p0, s64}}); - getActionDefinitionsBuilder(G_CONSTANT).legalFor({s64}); - getActionDefinitionsBuilder(G_BUILD_VECTOR) - .legalFor({{v2s16, s16}}) - .clampScalar(1, s16, s16); - getActionDefinitionsBuilder(G_BUILD_VECTOR_TRUNC).legalFor({{v2s8, s16}}); - getActionDefinitionsBuilder(G_ANYEXT).legalFor({{s32, s16}}); - }); - ALegalizerInfo LI(MF->getSubtarget()); Legalizer::MFResult Result = @@ -76,4 +82,140 @@ EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF; } +// Making sure the legalization finishes successfully w/o failure to combine +// away all the legalization artifacts regardless of the order of their +// creation. +TEST_F(GISelMITest, UnorderedArtifactCombiningTest) { + StringRef MIRString = R"( + %vptr:_(p0) = COPY $x4 + %v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1) + %v0:_(s8), %v1:_(s8) = G_UNMERGE_VALUES %v:_(<2 x s8>) + %v0_ext:_(s16) = G_ANYEXT %v0:_(s8) + $h4 = COPY %v0_ext:_(s16) + )"; + setUp(MIRString.rtrim(' ')); + if (!TM) + return; + + ALegalizerInfo LI(MF->getSubtarget()); + + // The events here unfold as follows: + // 1. First, the function is scanned pre-forming the worklist of artifacts: + // + // UNMERGE (1): pushed into the worklist first, will be processed last. + // | + // ANYEXT (2) + // + // 2. Second, the load is scalarized, and then its destination is widened, + // forming the following chain of legalization artifacts: + // + // TRUNC (4): created last, will be processed first. + // | + // BUILD_VECTOR (3) + // | + // UNMERGE (1): pushed into the worklist first, will be processed last. + // | + // ANYEXT (2) + // + // 3. Third, the artifacts are attempted to be combined in pairs, looking + // through the def-use chain from the roots towards the leafs, visiting the + // roots in order they happen to be in the worklist: + // (4) - (trunc): can not be combined; + // (3) - (build_vector (trunc)): can not be combined; + // (2) - (anyext (unmerge)): can not be combined; + // (1) - (unmerge (build_vector)): combined and eliminated; + // + // leaving the function in the following state: + // + // TRUNC (1): moved to non-artifact instructions worklist first. + // | + // ANYEXT (2): also moved to non-artifact instructions worklist. + // + // Every other instruction is successfully legalized in full. + // If combining (unmerge (build_vector)) does not re-insert every artifact + // that had its def-use chain modified (shortened) into the artifact + // worklist (here it's just ANYEXT), the process moves on onto the next + // outer loop iteration of the top-level legalization algorithm here, w/o + // performing all the artifact combines possible. Let's consider this + // scenario first: + // 4.A. Neither TRUNC, nor ANYEXT can be legalized in isolation, both of them + // get moved to the retry worklist, but no additional artifacts were + // created in the process, thus algorithm concludes no progress could be + // made, and fails. + // 4.B. If, however, combining (unmerge (build_vector)) had re-inserted + // ANYEXT into the worklist (as ANYEXT's source changes, not by value, + // but by implementation), (anyext (trunc)) combine happens next, which + // fully eliminates all the artifacts and legalization succeeds. + // + // We're looking into making sure that (4.B) happens here, not (4.A). Note + // that in that case the first scan through the artifacts worklist, while not + // being done in any guaranteed order, only needs to find the innermost + // pair(s) of artifacts that could be immediately combined out. After that + // the process follows def-use chains, making them shorter at each step, thus + // combining everything that can be combined in O(n) time. + Legalizer::MFResult Result = + Legalizer::legalizeMachineFunction(*MF, LI, {}, B); + + EXPECT_TRUE(isNullMIPtr(Result.FailedOn)); + EXPECT_TRUE(Result.Changed); + + StringRef CheckString = R"( + CHECK: %vptr:_(p0) = COPY $x4 + CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1) + CHECK: %v0_ext:_(s16) = COPY [[LOAD_0]]:_(s16) + CHECK-NEXT: $h4 = COPY %v0_ext:_(s16) + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF; +} + +TEST_F(GISelMITest, UnorderedArtifactCombiningManyCopiesTest) { + StringRef MIRString = R"( + %vptr:_(p0) = COPY $x4 + %v:_(<2 x s8>) = G_LOAD %vptr:_(p0) :: (load 2, align 1) + %vc0:_(<2 x s8>) = COPY %v:_(<2 x s8>) + %vc1:_(<2 x s8>) = COPY %v:_(<2 x s8>) + %vc00:_(s8), %vc01:_(s8) = G_UNMERGE_VALUES %vc0:_(<2 x s8>) + %vc10:_(s8), %vc11:_(s8) = G_UNMERGE_VALUES %vc1:_(<2 x s8>) + %v0t:_(s8) = COPY %vc00:_(s8) + %v0:_(s8) = COPY %v0t:_(s8) + %v1t:_(s8) = COPY %vc11:_(s8) + %v1:_(s8) = COPY %v1t:_(s8) + %v0_zext:_(s32) = G_ZEXT %v0:_(s8) + %v1_sext:_(s32) = G_SEXT %v1:_(s8) + $w4 = COPY %v0_zext:_(s32) + $w5 = COPY %v1_sext:_(s32) + )"; + setUp(MIRString.rtrim(' ')); + if (!TM) + return; + + ALegalizerInfo LI(MF->getSubtarget()); + + Legalizer::MFResult Result = + Legalizer::legalizeMachineFunction(*MF, LI, {}, B); + + EXPECT_TRUE(isNullMIPtr(Result.FailedOn)); + EXPECT_TRUE(Result.Changed); + + StringRef CheckString = R"( + CHECK: %vptr:_(p0) = COPY $x4 + CHECK-NEXT: [[LOAD_0:%[0-9]+]]:_(s16) = G_LOAD %vptr:_(p0) :: (load 1) + CHECK-NEXT: [[OFFSET_1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + CHECK-NEXT: [[VPTR_1:%[0-9]+]]:_(p0) = G_PTR_ADD %vptr:_, [[OFFSET_1]]:_(s64) + CHECK-NEXT: [[LOAD_1:%[0-9]+]]:_(s16) = G_LOAD [[VPTR_1]]:_(p0) :: (load 1) + CHECK-NEXT: [[FF_MASK:%[0-9]+]]:_(s32) = G_CONSTANT i32 255 + CHECK-NEXT: [[V0_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_0]]:_(s16) + CHECK-NEXT: %v0_zext:_(s32) = G_AND [[V0_EXT]]:_, [[FF_MASK]]:_ + CHECK-NEXT: [[V1_EXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD_1]]:_(s16) + CHECK-NEXT: [[SHAMNT:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + CHECK-NEXT: [[V1_SHL:%[0-9]+]]:_(s32) = G_SHL [[V1_EXT]]:_, [[SHAMNT]]:_(s32) + CHECK-NEXT: %v1_sext:_(s32) = G_ASHR [[V1_SHL]]:_, [[SHAMNT]]:_(s32) + CHECK-NEXT: $w4 = COPY %v0_zext:_(s32) + CHECK-NEXT: $w5 = COPY %v1_sext:_(s32) + )"; + + EXPECT_TRUE(CheckMachineFunction(*MF, CheckString)) << *MF; +} + } // namespace