diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h --- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/GlobalISel/Utils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Register.h" #include "llvm/Support/Debug.h" #define DEBUG_TYPE "legalizer" @@ -545,7 +546,9 @@ MachineIRBuilder &MIB; const LegalizerInfo &LI; - private: + // Stores the best register found in the current query so far. + Register CurrentBest = Register(); + /// Given an concat_vector op \p Concat and a start bit and size, try to /// find the origin of the value defined by that start position and size. /// @@ -567,15 +570,15 @@ // FIXME: we might be able return multiple sources? Or create an // appropriate concat to make it fit. if (InRegOffset + Size > SrcSize) - return Register(); + return CurrentBest; - // If the bits exactly cover a single source, then return the operand as - // our value reg. Register SrcReg = Concat.getReg(StartSrcIdx); - if (InRegOffset == 0 && Size == SrcSize) - return SrcReg; // A source operand matches exactly. + if (InRegOffset == 0 && Size == SrcSize) { + CurrentBest = SrcReg; + return findValueFromDefImpl(SrcReg, 0, Size); + } - return findValueFromDef(SrcReg, InRegOffset, Size); + return findValueFromDefImpl(SrcReg, InRegOffset, Size); } /// Given an build_vector op \p BV and a start bit and size, try to find @@ -597,17 +600,21 @@ unsigned InRegOffset = StartBit % SrcSize; if (InRegOffset != 0) - return Register(); // Give up, bits don't start at a scalar source. + return CurrentBest; // Give up, bits don't start at a scalar source. if (Size < SrcSize) - return Register(); // Scalar source is too large for requested bits. + return CurrentBest; // Scalar source is too large for requested bits. // If the bits cover multiple sources evenly, then create a new // build_vector to synthesize the required size, if that's been requested. if (Size > SrcSize) { if (Size % SrcSize > 0) - return Register(); // Isn't covered exactly by sources. + return CurrentBest; // Isn't covered exactly by sources. unsigned NumSrcsUsed = Size / SrcSize; + // If we're requesting all of the sources, just return this def. + if (NumSrcsUsed == BV.getNumSources()) + return BV.getReg(0); + LLT SrcTy = MRI.getType(Src1Reg); LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); @@ -615,7 +622,7 @@ LegalizeActionStep ActionStep = LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); if (ActionStep.Action != LegalizeActions::Legal) - return Register(); + return CurrentBest; SmallVector NewSrcs; for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; @@ -686,28 +693,25 @@ if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { SrcRegToUse = ContainerSrcReg; NewStartBit = StartBit; - return findValueFromDef(SrcRegToUse, NewStartBit, Size); + return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); } if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { SrcRegToUse = InsertedReg; NewStartBit = StartBit - InsertOffset; - return findValueFromDef(SrcRegToUse, NewStartBit, Size); + if (NewStartBit == 0 && + Size == MRI.getType(SrcRegToUse).getSizeInBits()) + CurrentBest = SrcRegToUse; + return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); } // The bit range spans both the inserted and container regions. return Register(); } - public: - ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, - const LegalizerInfo &Info) - : MRI(Mri), MIB(Builder), LI(Info) {} - - /// Try to find a source of the value defined in the def \p DefReg, starting - /// at position \p StartBit with size \p Size. - /// \returns an empty Register if no value could be found, or \p DefReg if - /// if that was the best we could do. - Register findValueFromDef(Register DefReg, unsigned StartBit, - unsigned Size) { + /// Internal implementation for findValueFromDef(). findValueFromDef() + /// initializes some data like the CurrentBest register, which this method + /// and its callees rely upon. + Register findValueFromDefImpl(Register DefReg, unsigned StartBit, + unsigned Size) { MachineInstr *Def = getDefIgnoringCopies(DefReg, MRI); // If the instruction has a single def, then simply delegate the search. // For unmerge however with multiple defs, we need to compute the offset @@ -725,7 +729,7 @@ } Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); Register SrcOriginReg = - findValueFromDef(SrcReg, StartBit + DefStartBit, Size); + findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); if (SrcOriginReg) return SrcOriginReg; // Failed to find a further value. If the StartBit and Size perfectly @@ -733,7 +737,7 @@ // nothing. if (StartBit == 0 && Size == DefSize) return DefReg; - return Register(); + return CurrentBest; } case TargetOpcode::G_BUILD_VECTOR: return findValueFromBuildVector(cast(*Def), StartBit, @@ -741,10 +745,26 @@ case TargetOpcode::G_INSERT: return findValueFromInsert(*Def, StartBit, Size); default: - return Register(); + return CurrentBest; } } + public: + ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, + const LegalizerInfo &Info) + : MRI(Mri), MIB(Builder), LI(Info) {} + + /// Try to find a source of the value defined in the def \p DefReg, starting + /// at position \p StartBit with size \p Size. + /// \returns a register with the requested size, or an empty Register if no + /// better value could be found. + Register findValueFromDef(Register DefReg, unsigned StartBit, + unsigned Size) { + CurrentBest = Register(); + Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); + return FoundReg != DefReg ? FoundReg : Register(); + } + /// Try to combine the defs of an unmerge \p MI by attempting to find /// values that provides the bits for each def reg. /// \returns true if all the defs of the unmerge have been made dead. @@ -760,9 +780,8 @@ DeadDefs[DefIdx] = true; continue; } - Register FoundVal = - findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); - if (!FoundVal || FoundVal == DefReg) + Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); + if (!FoundVal) continue; if (MRI.getType(FoundVal) != DestTy) continue; diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-inserts.mir @@ -37,14 +37,10 @@ ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[COPY1]](s64), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s32) - ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s32), 0 ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY3]](s64) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INSERT]](s64) ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s32), [[DEF]](s32) ; CHECK: $x0 = COPY [[MV]](s64) ; CHECK: $x1 = COPY [[MV1]](s64) %0:_(s64) = COPY $x0 @@ -69,11 +65,7 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s64) ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY [[COPY]](s64) - ; CHECK: [[EXTRACT:%[0-9]+]]:_(s1) = G_EXTRACT [[COPY1]](s64), 0 - ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT]](s1) - ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[ANYEXT]], [[TRUNC]](s1), 0 ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8), [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8), [[UV6:%[0-9]+]]:_(s8), [[UV7:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY3]](s64) ; CHECK: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 1 ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s8) @@ -201,27 +193,12 @@ ; CHECK: [[LSHR54:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT54]], [[C11]](s64) ; CHECK: [[ZEXT55:%[0-9]+]]:_(s32) = G_ZEXT [[UV7]](s8) ; CHECK: [[LSHR55:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT55]], [[C13]](s64) - ; CHECK: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8), [[UV12:%[0-9]+]]:_(s8), [[UV13:%[0-9]+]]:_(s8), [[UV14:%[0-9]+]]:_(s8), [[UV15:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[INSERT]](s64) - ; CHECK: [[ZEXT56:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR56:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT56]], [[C1]](s64) - ; CHECK: [[ZEXT57:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR57:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT57]], [[C3]](s64) - ; CHECK: [[ZEXT58:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR58:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT58]], [[C5]](s64) - ; CHECK: [[ZEXT59:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR59:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT59]], [[C7]](s64) - ; CHECK: [[ZEXT60:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR60:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT60]], [[C9]](s64) - ; CHECK: [[ZEXT61:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR61:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT61]], [[C11]](s64) - ; CHECK: [[ZEXT62:%[0-9]+]]:_(s32) = G_ZEXT [[UV8]](s8) - ; CHECK: [[LSHR62:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT62]], [[C13]](s64) ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32) ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C14]] ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64) - ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) - ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C14]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8) + ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C14]] ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[SHL]](s32) ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[COPY5]] ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32) @@ -260,12 +237,12 @@ ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[OR5]](s32) ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[SHL6]](s32) ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY22]], [[COPY23]] - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR6]](s32) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR6]](s32) ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32) ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY24]], [[C14]] ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[C1]](s64) - ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) - ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C14]] + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8) + ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C14]] ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[SHL7]](s32) ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[COPY25]] ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32) @@ -304,12 +281,12 @@ ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[OR12]](s32) ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[SHL13]](s32) ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[COPY42]], [[COPY43]] - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR13]](s32) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[OR13]](s32) ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32) ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY44]], [[C14]] ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND16]], [[C1]](s64) - ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) - ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C14]] + ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8) + ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C14]] ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[SHL14]](s32) ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[AND17]], [[COPY45]] ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32) @@ -348,12 +325,12 @@ ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[OR19]](s32) ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[SHL20]](s32) ; CHECK: [[OR20:%[0-9]+]]:_(s32) = G_OR [[COPY62]], [[COPY63]] - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR20]](s32) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[OR20]](s32) ; CHECK: [[COPY64:%[0-9]+]]:_(s32) = COPY [[LSHR21]](s32) ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY64]], [[C14]] ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[C1]](s64) - ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) - ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C14]] + ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8) + ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C14]] ; CHECK: [[COPY65:%[0-9]+]]:_(s32) = COPY [[SHL21]](s32) ; CHECK: [[OR21:%[0-9]+]]:_(s32) = G_OR [[AND25]], [[COPY65]] ; CHECK: [[COPY66:%[0-9]+]]:_(s32) = COPY [[LSHR22]](s32) @@ -392,12 +369,12 @@ ; CHECK: [[COPY82:%[0-9]+]]:_(s32) = COPY [[OR26]](s32) ; CHECK: [[COPY83:%[0-9]+]]:_(s32) = COPY [[SHL27]](s32) ; CHECK: [[OR27:%[0-9]+]]:_(s32) = G_OR [[COPY82]], [[COPY83]] - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR27]](s32) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR27]](s32) ; CHECK: [[COPY84:%[0-9]+]]:_(s32) = COPY [[LSHR28]](s32) ; CHECK: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY84]], [[C14]] ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND32]], [[C1]](s64) - ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) - ; CHECK: [[AND33:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C14]] + ; CHECK: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[UV4]](s8) + ; CHECK: [[AND33:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C14]] ; CHECK: [[COPY85:%[0-9]+]]:_(s32) = COPY [[SHL28]](s32) ; CHECK: [[OR28:%[0-9]+]]:_(s32) = G_OR [[AND33]], [[COPY85]] ; CHECK: [[COPY86:%[0-9]+]]:_(s32) = COPY [[LSHR29]](s32) @@ -436,12 +413,12 @@ ; CHECK: [[COPY102:%[0-9]+]]:_(s32) = COPY [[OR33]](s32) ; CHECK: [[COPY103:%[0-9]+]]:_(s32) = COPY [[SHL34]](s32) ; CHECK: [[OR34:%[0-9]+]]:_(s32) = G_OR [[COPY102]], [[COPY103]] - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[OR34]](s32) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[OR34]](s32) ; CHECK: [[COPY104:%[0-9]+]]:_(s32) = COPY [[LSHR35]](s32) ; CHECK: [[AND40:%[0-9]+]]:_(s32) = G_AND [[COPY104]], [[C14]] ; CHECK: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[AND40]], [[C1]](s64) - ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) - ; CHECK: [[AND41:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C14]] + ; CHECK: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[UV5]](s8) + ; CHECK: [[AND41:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C14]] ; CHECK: [[COPY105:%[0-9]+]]:_(s32) = COPY [[SHL35]](s32) ; CHECK: [[OR35:%[0-9]+]]:_(s32) = G_OR [[AND41]], [[COPY105]] ; CHECK: [[COPY106:%[0-9]+]]:_(s32) = COPY [[LSHR36]](s32) @@ -480,12 +457,12 @@ ; CHECK: [[COPY122:%[0-9]+]]:_(s32) = COPY [[OR40]](s32) ; CHECK: [[COPY123:%[0-9]+]]:_(s32) = COPY [[SHL41]](s32) ; CHECK: [[OR41:%[0-9]+]]:_(s32) = G_OR [[COPY122]], [[COPY123]] - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[OR41]](s32) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[OR41]](s32) ; CHECK: [[COPY124:%[0-9]+]]:_(s32) = COPY [[LSHR42]](s32) ; CHECK: [[AND48:%[0-9]+]]:_(s32) = G_AND [[COPY124]], [[C14]] ; CHECK: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[AND48]], [[C1]](s64) - ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) - ; CHECK: [[AND49:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C14]] + ; CHECK: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[UV6]](s8) + ; CHECK: [[AND49:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C14]] ; CHECK: [[COPY125:%[0-9]+]]:_(s32) = COPY [[SHL42]](s32) ; CHECK: [[OR42:%[0-9]+]]:_(s32) = G_OR [[AND49]], [[COPY125]] ; CHECK: [[COPY126:%[0-9]+]]:_(s32) = COPY [[LSHR43]](s32) @@ -524,12 +501,12 @@ ; CHECK: [[COPY142:%[0-9]+]]:_(s32) = COPY [[OR47]](s32) ; CHECK: [[COPY143:%[0-9]+]]:_(s32) = COPY [[SHL48]](s32) ; CHECK: [[OR48:%[0-9]+]]:_(s32) = G_OR [[COPY142]], [[COPY143]] - ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[OR48]](s32) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s8) = G_TRUNC [[OR48]](s32) ; CHECK: [[COPY144:%[0-9]+]]:_(s32) = COPY [[LSHR49]](s32) ; CHECK: [[AND56:%[0-9]+]]:_(s32) = G_AND [[COPY144]], [[C14]] ; CHECK: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[AND56]], [[C1]](s64) - ; CHECK: [[ANYEXT8:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) - ; CHECK: [[AND57:%[0-9]+]]:_(s32) = G_AND [[ANYEXT8]], [[C14]] + ; CHECK: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[UV7]](s8) + ; CHECK: [[AND57:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C14]] ; CHECK: [[COPY145:%[0-9]+]]:_(s32) = COPY [[SHL49]](s32) ; CHECK: [[OR49:%[0-9]+]]:_(s32) = G_OR [[AND57]], [[COPY145]] ; CHECK: [[COPY146:%[0-9]+]]:_(s32) = COPY [[LSHR50]](s32) @@ -568,13 +545,13 @@ ; CHECK: [[COPY162:%[0-9]+]]:_(s32) = COPY [[OR54]](s32) ; CHECK: [[COPY163:%[0-9]+]]:_(s32) = COPY [[SHL55]](s32) ; CHECK: [[OR55:%[0-9]+]]:_(s32) = G_OR [[COPY162]], [[COPY163]] - ; CHECK: [[TRUNC8:%[0-9]+]]:_(s8) = G_TRUNC [[OR55]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8), [[TRUNC8]](s8) + ; CHECK: [[TRUNC7:%[0-9]+]]:_(s8) = G_TRUNC [[OR55]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[TRUNC6]](s8), [[TRUNC7]](s8) ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[COPY164:%[0-9]+]]:_(s32) = COPY [[C15]](s32) ; CHECK: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[COPY164]], [[C1]](s64) - ; CHECK: [[ANYEXT9:%[0-9]+]]:_(s32) = G_ANYEXT [[UV8]](s8) - ; CHECK: [[AND64:%[0-9]+]]:_(s32) = G_AND [[ANYEXT9]], [[C14]] + ; CHECK: [[TRUNC8:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64) + ; CHECK: [[AND64:%[0-9]+]]:_(s32) = G_AND [[TRUNC8]], [[C14]] ; CHECK: [[COPY165:%[0-9]+]]:_(s32) = COPY [[SHL56]](s32) ; CHECK: [[OR56:%[0-9]+]]:_(s32) = G_OR [[AND64]], [[COPY165]] ; CHECK: [[COPY166:%[0-9]+]]:_(s32) = COPY [[C15]](s32) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/bug-legalization-artifact-combiner-dead-def.mir @@ -34,13 +34,10 @@ ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32) - ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32 - ; GFX10: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>) ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: G_STORE [[UV1]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -70,13 +67,10 @@ ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32) - ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32 - ; GFX10: %deaf_def:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>) ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -110,14 +104,11 @@ ; GFX10: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 96 ; GFX10: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[LOAD]](<4 x s32>), 64 ; GFX10: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[EXTRACT1]](s32), [[DEF]](s32) - ; GFX10: [[INSERT:%[0-9]+]]:_(<2 x s32>) = G_INSERT [[BUILD_VECTOR]], [[EXTRACT]](s32), 32 - ; GFX10: %dbg_use:_(s32), %11:_(s32) = G_UNMERGE_VALUES [[INSERT]](<2 x s32>) - ; GFX10: DBG_VALUE %dbg_use(s32), $noreg + ; GFX10: DBG_VALUE $noreg, $noreg, <0x600002a96c80>, !DIExpression(), debug-location !DILocation(line: 1, column: 1, scope: <0x60000229c040>) ; GFX10: G_STORE [[EXTRACT1]](s32), [[COPY]](p5) :: (store (s32), align 8, addrspace 5) ; GFX10: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 ; GFX10: [[PTR_ADD:%[0-9]+]]:_(p5) = G_PTR_ADD [[COPY]], [[C]](s32) - ; GFX10: G_STORE %11(s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) + ; GFX10: G_STORE [[EXTRACT]](s32), [[PTR_ADD]](p5) :: (store (s32) into unknown-address + 4, addrspace 5) %0:_(p5) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2