Index: llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h +++ llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h @@ -163,6 +163,26 @@ LLT PartTy, ArrayRef PartRegs, LLT LeftoverTy = LLT(), ArrayRef LeftoverRegs = {}); + /// Unmerge \p SrcReg into \p Parts with the greatest common divisor type with + /// \p DstTy and \p NarrowTy. Returns the GCD type. + LLT extractGCDType(SmallVectorImpl &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg); + + /// Produce a merge of values in \p VRegs to define \p DstReg. Perform a merge + /// from the least common multiple type, and convert as appropriate to \p + /// DstReg. + /// + /// \p VRegs should each have type \p GCDTy. This type should be greatest + /// common divisor type of \p DstReg, \p NarrowTy, and an undetermined source + /// type. + /// + /// \p NarrowTy is the desired result merge source type. If the source value + /// needs to be widened to evenly cover \p DstReg, inserts high bits + /// corresponding to the extension opcode \p PadStrategy. + void buildLCMMerge(Register DstReg, LLT NarrowTy, LLT GCDTy, + SmallVectorImpl &VRegs, + unsigned PadStrategy = TargetOpcode::G_ANYEXT); + /// Perform generic multiplication of values held in multiple registers. /// Generated instructions use only types NarrowTy and i1. /// Destination can be same or two times size of the source. Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -63,6 +63,35 @@ return std::make_pair(NumParts, NumLeftover); } +static LLT getGCDType(LLT OrigTy, LLT TargetTy) { + if (OrigTy.isVector() && TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy.getElementType()); + int GCD = greatestCommonDivisor(OrigTy.getNumElements(), + TargetTy.getNumElements()); + return LLT::scalarOrVector(GCD, OrigTy.getElementType()); + } + + if (OrigTy.isVector() && !TargetTy.isVector()) { + assert(OrigTy.getElementType() == TargetTy); + return TargetTy; + } + + assert(!OrigTy.isVector() && !TargetTy.isVector() && + "GCD type of vector and scalar not implemented"); + + int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), + TargetTy.getSizeInBits()); + return LLT::scalar(GCD); +} + +static LLT getLCMType(LLT Ty0, LLT Ty1) { + assert(Ty0.isScalar() && Ty1.isScalar() && "not yet handled"); + unsigned Mul = Ty0.getSizeInBits() * Ty1.getSizeInBits(); + int GCDSize = greatestCommonDivisor(Ty0.getSizeInBits(), + Ty1.getSizeInBits()); + return LLT::scalar(Mul / GCDSize); +} + LegalizerHelper::LegalizerHelper(MachineFunction &MF, GISelChangeObserver &Observer, MachineIRBuilder &Builder) @@ -172,26 +201,6 @@ return true; } -static LLT getGCDType(LLT OrigTy, LLT TargetTy) { - if (OrigTy.isVector() && TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy.getElementType()); - int GCD = greatestCommonDivisor(OrigTy.getNumElements(), - TargetTy.getNumElements()); - return LLT::scalarOrVector(GCD, OrigTy.getElementType()); - } - - if (OrigTy.isVector() && !TargetTy.isVector()) { - assert(OrigTy.getElementType() == TargetTy); - return TargetTy; - } - - assert(!OrigTy.isVector() && !TargetTy.isVector()); - - int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(), - TargetTy.getSizeInBits()); - return LLT::scalar(GCD); -} - void LegalizerHelper::insertParts(Register DstReg, LLT ResultTy, LLT PartTy, ArrayRef PartRegs, @@ -237,6 +246,133 @@ } } +/// Return the result registers of G_UNMERGE_VALUES \p MI in \p Regs +static void getUnmergeResults(SmallVectorImpl &Regs, + const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); + + const int NumResults = MI.getNumOperands() - 1; + Regs.resize(NumResults); + for (int I = 0; I != NumResults; ++I) + Regs[I] = MI.getOperand(I).getReg(); +} + +LLT LegalizerHelper::extractGCDType(SmallVectorImpl &Parts, LLT DstTy, + LLT NarrowTy, Register SrcReg) { + LLT SrcTy = MRI.getType(SrcReg); + + LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy); + if (SrcTy == GCDTy) { + // If the source already evenly divides the result type, we don't need to do + // anything. + Parts.push_back(SrcReg); + } else { + // Need to split into common type sized pieces. + auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg); + getUnmergeResults(Parts, *Unmerge); + } + + return GCDTy; +} + +void LegalizerHelper::buildLCMMerge(Register DstReg, LLT NarrowTy, LLT GCDTy, + SmallVectorImpl &VRegs, + unsigned PadStrategy) { + LLT DstTy = MRI.getType(DstReg); + LLT LCMTy = getLCMType(DstTy, NarrowTy); + + int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits(); + int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits(); + int NumOrigSrc = VRegs.size(); + + Register PadReg; + + // Get a value we can use to pad the source value if the sources won't evenly + // cover the result type. + if (NumOrigSrc < NumParts * NumSubParts) { + if (PadStrategy == TargetOpcode::G_ZEXT) + PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0); + else if (PadStrategy == TargetOpcode::G_ANYEXT) + PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0); + else { + assert(PadStrategy == TargetOpcode::G_SEXT); + + // Shift the sign bit of the low register through the high register. + auto ShiftAmt = + MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1); + PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0); + } + } + + // Registers for the final merge to be produced. + SmallVector Remerge; + Remerge.resize(NumParts); + + // Registers needed for intermediate merges, which will be merged into a + // source for Remerge. + SmallVector SubMerge; + SubMerge.resize(NumSubParts); + + // Once we've fully read off the end of the original source bits, we can reuse + // the same high bits for remaining padding elements. + Register AllPadReg; + + // Build merges to the LCM type to cover the original result type. + for (int I = 0; I != NumParts; ++I) { + bool AllMergePartsArePadding = true; + + // Build the requested merges to the requested type. + for (int J = 0; J != NumSubParts; ++J) { + int Idx = I * NumSubParts + J; + if (Idx >= NumOrigSrc) { + SubMerge[J] = PadReg; + continue; + } + + SubMerge[J] = VRegs[Idx]; + + // There are meaningful bits here we can't reuse later. + AllMergePartsArePadding = false; + } + + // If we've filled up a complete piece with padding bits, we can directly + // emit the natural sized constant if applicable, rather than a merge of + // smaller constants. + if (AllMergePartsArePadding && !AllPadReg) { + if (PadStrategy == TargetOpcode::G_ANYEXT) + AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0); + else if (PadStrategy == TargetOpcode::G_ZEXT) + AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0); + + // If this is a sign extension, we can't materialize a trivial constant + // with the right type and have to produce a merge. + } + + if (AllPadReg) { + // Avoid creating additional instructions if we're just adding additional + // copies of padding bits. + Remerge[I] = AllPadReg; + continue; + } + + if (NumSubParts == 1) + Remerge[I] = SubMerge[0]; + else + Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0); + + // In the sign extend padding case, re-use the first all-signbit merge. + if (AllMergePartsArePadding && !AllPadReg) + AllPadReg = Remerge[I]; + } + + // Create the merge to the widened source, and extract the relevant bits into + // the result. + if (DstTy == LCMTy) + MIRBuilder.buildMerge(DstReg, Remerge); + else + MIRBuilder.buildTrunc(DstReg, MIRBuilder.buildMerge(LCMTy, Remerge)); +} + static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) { switch (Opcode) { case TargetOpcode::G_SDIV: @@ -3643,33 +3779,14 @@ Register DstReg = MI.getOperand(0).getReg(); Register SrcReg = MI.getOperand(1).getReg(); - LLT DstTy = MRI.getType(DstReg); - LLT SrcTy = MRI.getType(SrcReg); - unsigned DstSize = DstTy.getSizeInBits(); - unsigned SrcSize = SrcTy.getSizeInBits(); - if (DstSize % SrcSize != 0) + LLT DstTy = MRI.getType(DstReg); + if (DstTy.isVector()) return UnableToLegalize; - Register PadReg; - if (MI.getOpcode() == TargetOpcode::G_ZEXT) - PadReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0); - else if (MI.getOpcode() == TargetOpcode::G_ANYEXT) - PadReg = MIRBuilder.buildUndef(SrcTy).getReg(0); - else { - // Shift the sign bit of the low register through the high register. - auto ShiftAmt = - MIRBuilder.buildConstant(LLT::scalar(64), SrcSize - 1); - PadReg = MIRBuilder.buildAShr(SrcTy, SrcReg, ShiftAmt).getReg(0); - } - - // Generate a merge where the bottom bits are taken from the source, and - // zero/impdef/sign bit everything else. - unsigned NumParts = DstSize / SrcSize; - SmallVector Srcs = {SrcReg}; - for (unsigned Part = 1; Part < NumParts; ++Part) - Srcs.push_back(PadReg); - MIRBuilder.buildMerge(DstReg, Srcs); + SmallVector Parts; + LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg); + buildLCMMerge(DstReg, NarrowTy, GCDTy, Parts, MI.getOpcode()); MI.eraseFromParent(); return Legalized; } Index: llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalize-sext-zext-128.mir @@ -57,8 +57,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: G_STORE [[MV]](s128), [[COPY1]](p0) :: (store 16) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; CHECK: G_STORE [[MV1]](s128), [[COPY1]](p0) :: (store 16) ; CHECK: RET_ReallyLR %0:_(s32) = COPY $w0 %1:_(p0) = COPY $x1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-anyext.mir @@ -283,13 +283,71 @@ ; CHECK-LABEL: name: test_anyext_s32_to_s128 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s128) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s128) %0:_(s32) = COPY $vgpr0 %1:_(s128) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 ... +--- +name: test_anyext_s32_to_s160 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s32_to_s160 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s160) + %0:_(s32) = COPY $vgpr0 + %1:_(s160) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_anyext_s32_to_s192 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s32_to_s192 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s192) + %0:_(s32) = COPY $vgpr0 + %1:_(s192) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_anyext_s32_to_s224 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s32_to_s224 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224) + %0:_(s32) = COPY $vgpr0 + %1:_(s224) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_anyext_s32_to_s256 body: | @@ -299,8 +357,10 @@ ; CHECK-LABEL: name: test_anyext_s32_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256) %0:_(s32) = COPY $vgpr0 %1:_(s256) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 @@ -315,13 +375,34 @@ ; CHECK-LABEL: name: test_anyext_s32_to_s512 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s512) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s512) %0:_(s32) = COPY $vgpr0 %1:_(s512) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 ... +--- +name: test_anyext_s32_to_s992 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s32_to_s992 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224) + %0:_(s32) = COPY $vgpr0 + %1:_(s224) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_anyext_s32_to_s1024 body: | @@ -331,8 +412,10 @@ ; CHECK-LABEL: name: test_anyext_s32_to_s1024 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s1024) %0:_(s32) = COPY $vgpr0 %1:_(s1024) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 @@ -354,6 +437,22 @@ S_ENDPGM 0, implicit %1 ... +--- +name: test_anyext_s64_to_s192 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_anyext_s64_to_s192 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[DEF]](s64), [[DEF]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s192) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s192) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_anyext_s64_to_s256 body: | @@ -402,16 +501,24 @@ S_ENDPGM 0, implicit %1 ... -# --- -# name: test_anyext_s96_to_s128 -# body: | -# bb.0: -# liveins: $vgpr0_vgpr1_vgpr2 +--- +name: test_anyext_s96_to_s128 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 -# %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 -# %1:_(s128) = G_ANYEXT %0 -# S_ENDPGM 0, implicit %1 -# ... + ; CHECK-LABEL: name: test_anyext_s96_to_s128 + ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s128) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... --- name: test_anyext_s128_to_s256 @@ -421,10 +528,110 @@ ; CHECK-LABEL: name: test_anyext_s128_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF - ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[DEF]](s128) + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[DEF]](s64), [[DEF]](s64) ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s256) = G_ANYEXT %0 S_ENDPGM 0, implicit %1 ... + +--- +name: test_anyext_s32_to_s88 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s32_to_s88 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[OR]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR2]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[UV]](s16) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[UV1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[UV2]](s16) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[UV3]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[DEF]](s32) + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[TRUNC]], [[C3]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) + ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL5]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[OR5]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C1]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL6]] + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[COPY5]](s16) + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C1]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL7]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC1]](s88) + %0:_(s32) = COPY $vgpr0 + %1:_(s88) = G_ANYEXT %0 + S_ENDPGM 0, implicit %1 +... + +# The instruction count blows up for this and takes too long to +# generate checks. This fails on a G_MERGE_VALUES to s4160 +# +# --- +# name: test_anyext_s32_to_s65 +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:_(s32) = COPY $vgpr0 +# %1:_(s65) = G_ANYEXT %0 +# S_ENDPGM 0, implicit %1 +# ... + +--- +name: test_anyext_s2_to_s112 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_anyext_s2_to_s112 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C1]], [[SHL1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s112) = G_TRUNC [[MV1]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s112) + %0:_(s32) = COPY $vgpr0 + %1:_(s2) = G_TRUNC %0 + %2:_(s112) = G_ANYEXT %1 + S_ENDPGM 0, implicit %2 +... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ashr.mir @@ -837,74 +837,80 @@ ; SI: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[TRUNC]](s32) ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C3]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; VI-LABEL: name: test_ashr_s128_s128 ; VI: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[TRUNC]](s32) ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C3]](s32) ; VI: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] ; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; GFX9-LABEL: name: test_ashr_s128_s128 ; GFX9: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[TRUNC]](s32) ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C2]](s32) + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C3]](s32) ; GFX9: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(s128) = G_ZEXT %1 @@ -1241,72 +1247,73 @@ ; SI: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[TRUNC]](s32) ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[TRUNC]](s32) ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) + ; SI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; SI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) ; SI: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; SI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; SI: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[TRUNC]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[TRUNC]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; SI: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; SI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] + ; SI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] ; SI: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; SI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; SI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; SI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; SI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; SI: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; SI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; SI: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] + ; SI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] ; SI: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; SI: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; SI: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; SI: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) ; SI: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; SI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; SI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; SI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; SI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; SI: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; SI: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; SI: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; SI: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; SI: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) + ; SI: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) ; SI: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; SI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; SI: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1316,82 +1323,83 @@ ; SI: [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; SI: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT12]] ; SI: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT13]] - ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) + ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) ; SI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[ASHR3]] ; SI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[ASHR4]] - ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; SI: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; SI: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; SI: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) ; VI-LABEL: name: test_ashr_s256_s256 ; VI: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[TRUNC]](s32) ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[TRUNC]](s32) ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; VI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) + ; VI: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; VI: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) ; VI: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; VI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; VI: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[TRUNC]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[TRUNC]](s32) ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; VI: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; VI: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] + ; VI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] ; VI: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; VI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; VI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; VI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; VI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; VI: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; VI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; VI: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] + ; VI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] ; VI: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; VI: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; VI: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; VI: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) ; VI: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; VI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; VI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; VI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; VI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; VI: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; VI: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; VI: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; VI: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; VI: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) + ; VI: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) ; VI: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; VI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; VI: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1401,82 +1409,83 @@ ; VI: [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; VI: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT12]] ; VI: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT13]] - ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) + ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) ; VI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[ASHR3]] ; VI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[ASHR4]] - ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; VI: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; VI: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; VI: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) ; GFX9-LABEL: name: test_ashr_s256_s256 ; GFX9: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[TRUNC]](s32) ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[TRUNC]](s32) ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C3]](s32) + ; GFX9: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; GFX9: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[C4]](s32) ; GFX9: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV3]], [[SUB2]](s32) ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[ASHR2]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[ASHR]], [[ASHR1]] ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[TRUNC]](s32) ; GFX9: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[TRUNC]](s32) ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV5]], [[SUB5]](s32) ; GFX9: [[OR1:%[0-9]+]]:_(s64) = G_OR [[LSHR2]], [[SHL1]] - ; GFX9: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C4]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] ; GFX9: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; GFX9: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL3]] ; GFX9: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C4]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) - ; GFX9: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C3]](s32) + ; GFX9: [[ASHR3:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) + ; GFX9: [[ASHR4:%[0-9]+]]:_(s64) = G_ASHR [[UV9]], [[C4]](s32) ; GFX9: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; GFX9: [[ASHR5:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB]](s32) ; GFX9: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV10]], [[SUB]](s32) ; GFX9: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV11]], [[SUB9]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL5]] - ; GFX9: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C3]](s32) + ; GFX9: [[ASHR6:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[C4]](s32) ; GFX9: [[ASHR7:%[0-9]+]]:_(s64) = G_ASHR [[UV11]], [[SUB8]](s32) ; GFX9: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[ASHR7]] ; GFX9: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV10]], [[SELECT9]] @@ -1486,12 +1495,12 @@ ; GFX9: [[UV12:%[0-9]+]]:_(s64), [[UV13:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; GFX9: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV12]], [[SELECT12]] ; GFX9: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV13]], [[SELECT13]] - ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) + ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) ; GFX9: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[ASHR3]] ; GFX9: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[ASHR4]] - ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; GFX9: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; GFX9: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; GFX9: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(s32) = COPY $vgpr8 %2:_(s256) = G_ZEXT %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-lshr.mir @@ -819,71 +819,74 @@ ; SI: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; VI-LABEL: name: test_lshr_s128_s128 ; VI: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; GFX9-LABEL: name: test_lshr_s128_s128 ; GFX9: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[TRUNC]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[TRUNC]](s32) ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV1]], [[SUB]](s32) ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[LSHR2]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C2]] - ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[LSHR]], [[C1]] + ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(s128) = G_ZEXT %1 @@ -1217,33 +1220,34 @@ ; SI: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[TRUNC]](s32) ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[TRUNC]](s32) ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] + ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; SI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; SI: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[TRUNC]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[TRUNC]](s32) @@ -1252,26 +1256,26 @@ ; SI: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; SI: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] + ; SI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] ; SI: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; SI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; SI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; SI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; SI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; SI: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; SI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; SI: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; SI: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; SI: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] + ; SI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] ; SI: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; SI: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; SI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; SI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; SI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; SI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; SI: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; SI: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; SI: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) @@ -1280,49 +1284,50 @@ ; SI: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; SI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; SI: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; SI: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] + ; SI: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] ; SI: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; SI: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; SI: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; SI: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; SI: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] - ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; SI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] - ; SI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; SI: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) + ; SI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] + ; SI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; SI: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; SI: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) ; VI-LABEL: name: test_lshr_s256_s256 ; VI: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[TRUNC]](s32) ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[TRUNC]](s32) ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] + ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; VI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; VI: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[TRUNC]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[TRUNC]](s32) @@ -1331,26 +1336,26 @@ ; VI: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; VI: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] + ; VI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] ; VI: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; VI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; VI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; VI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; VI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; VI: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; VI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; VI: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; VI: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; VI: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] + ; VI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] ; VI: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; VI: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; VI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; VI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; VI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; VI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; VI: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; VI: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; VI: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) @@ -1359,49 +1364,50 @@ ; VI: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; VI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; VI: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; VI: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] + ; VI: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] ; VI: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; VI: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; VI: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; VI: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; VI: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] - ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; VI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] - ; VI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; VI: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) + ; VI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] + ; VI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; VI: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; VI: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) ; GFX9-LABEL: name: test_lshr_s256_s256 ; GFX9: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[TRUNC]](s32) ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[TRUNC]](s32) ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[SUB3]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR1]], [[SHL]] - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV3]], [[SUB2]](s32) ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[LSHR2]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV2]], [[SELECT]] - ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C3]] + ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[LSHR]], [[C1]] ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[TRUNC]](s32) ; GFX9: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[TRUNC]](s32) @@ -1410,26 +1416,26 @@ ; GFX9: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR5]] ; GFX9: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C3]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR3]], [[C1]] ; GFX9: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; GFX9: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB1]](s32) ; GFX9: [[LSHR6:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9: [[SHL3:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[SUB1]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR6]], [[SHL3]] ; GFX9: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C3]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL2]], [[C1]] ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL4]] ; GFX9: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; GFX9: [[LSHR7:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB]](s32) ; GFX9: [[LSHR8:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB]](s32) @@ -1438,18 +1444,18 @@ ; GFX9: [[LSHR9:%[0-9]+]]:_(s64) = G_LSHR [[UV9]], [[SUB8]](s32) ; GFX9: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[LSHR9]] ; GFX9: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV8]], [[SELECT9]] - ; GFX9: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C3]] + ; GFX9: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[LSHR7]], [[C1]] ; GFX9: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT10]] ; GFX9: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; GFX9: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) ; GFX9: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT12]] ; GFX9: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT13]] - ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) - ; GFX9: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C3]] - ; GFX9: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; GFX9: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT14]](s64), [[SELECT15]](s64) + ; GFX9: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT1]], [[C1]] + ; GFX9: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; GFX9: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; GFX9: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(s32) = COPY $vgpr8 %2:_(s256) = G_ZEXT %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sext.mir @@ -338,13 +338,74 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s128) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128) %0:_(s32) = COPY $vgpr0 %1:_(s128) = G_SEXT %0 S_ENDPGM 0, implicit %1 ... +--- +name: test_sext_s32_to_s160 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_s32_to_s160 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV2]](s320) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s160) + %0:_(s32) = COPY $vgpr0 + %1:_(s160) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_sext_s32_to_s192 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_s32_to_s192 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s192) + %0:_(s32) = COPY $vgpr0 + %1:_(s192) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_sext_s32_to_s224 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_s32_to_s224 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224) + %0:_(s32) = COPY $vgpr0 + %1:_(s224) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_sext_s32_to_s256 body: | @@ -355,8 +416,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s256) %0:_(s32) = COPY $vgpr0 %1:_(s256) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -372,14 +435,37 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s512) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s512) %0:_(s32) = COPY $vgpr0 %1:_(s512) = G_SEXT %0 S_ENDPGM 0, implicit %1 ... --- +name: test_sext_s32_to_s992 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_s32_to_s992 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV2]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224) + %0:_(s32) = COPY $vgpr0 + %1:_(s224) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... + +--- + name: test_sext_s32_to_s1024 body: | bb.0: @@ -389,8 +475,10 @@ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[COPY]], [[C]](s32) - ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32), [[ASHR]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[ASHR]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[ASHR]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s1024) %0:_(s32) = COPY $vgpr0 %1:_(s1024) = G_SEXT %0 S_ENDPGM 0, implicit %1 @@ -413,6 +501,23 @@ S_ENDPGM 0, implicit %1 ... +--- +name: test_sext_s64_to_s192 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_sext_s64_to_s192 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 + ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[ASHR]](s64), [[ASHR]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s192) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s192) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_sext_s64_to_s256 body: | @@ -464,16 +569,25 @@ S_ENDPGM 0, implicit %1 ... -# --- -# name: test_sext_s96_to_s128 -# body: | -# bb.0: -# liveins: $vgpr0_vgpr1_vgpr2 +--- +name: test_sext_s96_to_s128 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 -# %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 -# %1:_(s128) = G_SEXT %0 -# S_ENDPGM 0, implicit %1 -# ... + ; CHECK-LABEL: name: test_sext_s96_to_s128 + ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 31 + ; CHECK: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[UV2]], [[C]](s32) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[ASHR]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s128) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... --- name: test_sext_s128_to_s256 @@ -483,28 +597,140 @@ ; CHECK-LABEL: name: test_sext_s128_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 - ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 127 - ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C]], [[C1]] - ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[C]] - ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[C]](s32), [[C1]] - ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[C]](s32), [[C2]] + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 ; CHECK: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32) - ; CHECK: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[C]](s32) - ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[SUB1]](s32) - ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL]] - ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 63 - ; CHECK: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C3]](s32) - ; CHECK: [[ASHR2:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[SUB]](s32) - ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[ASHR2]] - ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV]], [[SELECT]] - ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[ASHR]], [[ASHR1]] - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT1]](s64), [[SELECT2]](s64) - ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[MV]](s128) - ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256) + ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[ASHR]](s64), [[ASHR]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s256) = G_SEXT %0 S_ENDPGM 0, implicit %1 ... + +--- +name: test_sext_s32_to_s88 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_sext_s32_to_s88 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[OR]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR2]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 + ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[UV3]](s16) + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[COPY1]], [[C4]](s16) + ; CHECK: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SHL3]], [[C4]](s16) + ; CHECK: [[ASHR1:%[0-9]+]]:_(s16) = G_ASHR [[ASHR]], [[C3]](s32) + ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[UV]](s16) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[COPY2]], [[C5]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[UV1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY3]], [[C5]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL4]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[UV2]](s16) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[COPY4]], [[C5]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[UV3]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[COPY5]], [[C5]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL5]] + ; CHECK: [[COPY6:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[COPY6]], [[C5]] + ; CHECK: [[COPY7:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND5:%[0-9]+]]:_(s16) = G_AND [[COPY7]], [[C5]] + ; CHECK: [[SHL6:%[0-9]+]]:_(s16) = G_SHL [[AND5]], [[C4]](s16) + ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL6]] + ; CHECK: [[COPY8:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND6:%[0-9]+]]:_(s16) = G_AND [[COPY8]], [[C5]] + ; CHECK: [[COPY9:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND7:%[0-9]+]]:_(s16) = G_AND [[COPY9]], [[C5]] + ; CHECK: [[SHL7:%[0-9]+]]:_(s16) = G_SHL [[AND7]], [[C4]](s16) + ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[AND6]], [[SHL7]] + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C1]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL8]] + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR6]](s16) + ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C1]](s32) + ; CHECK: [[OR8:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL9]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR7]](s32), [[OR8]](s32) + ; CHECK: [[COPY10:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND8:%[0-9]+]]:_(s16) = G_AND [[COPY10]], [[C5]] + ; CHECK: [[COPY11:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND9:%[0-9]+]]:_(s16) = G_AND [[COPY11]], [[C5]] + ; CHECK: [[SHL10:%[0-9]+]]:_(s16) = G_SHL [[AND9]], [[C4]](s16) + ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[AND8]], [[SHL10]] + ; CHECK: [[COPY12:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND10:%[0-9]+]]:_(s16) = G_AND [[COPY12]], [[C5]] + ; CHECK: [[COPY13:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND11:%[0-9]+]]:_(s16) = G_AND [[COPY13]], [[C5]] + ; CHECK: [[SHL11:%[0-9]+]]:_(s16) = G_SHL [[AND11]], [[C4]](s16) + ; CHECK: [[OR10:%[0-9]+]]:_(s16) = G_OR [[AND10]], [[SHL11]] + ; CHECK: [[COPY14:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND12:%[0-9]+]]:_(s16) = G_AND [[COPY14]], [[C5]] + ; CHECK: [[COPY15:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND13:%[0-9]+]]:_(s16) = G_AND [[COPY15]], [[C5]] + ; CHECK: [[SHL12:%[0-9]+]]:_(s16) = G_SHL [[AND13]], [[C4]](s16) + ; CHECK: [[OR11:%[0-9]+]]:_(s16) = G_OR [[AND12]], [[SHL12]] + ; CHECK: [[COPY16:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND14:%[0-9]+]]:_(s16) = G_AND [[COPY16]], [[C5]] + ; CHECK: [[COPY17:%[0-9]+]]:_(s16) = COPY [[ASHR1]](s16) + ; CHECK: [[AND15:%[0-9]+]]:_(s16) = G_AND [[COPY17]], [[C5]] + ; CHECK: [[SHL13:%[0-9]+]]:_(s16) = G_SHL [[AND15]], [[C4]](s16) + ; CHECK: [[OR12:%[0-9]+]]:_(s16) = G_OR [[AND14]], [[SHL13]] + ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR9]](s16) + ; CHECK: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[OR10]](s16) + ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[ZEXT6]], [[C1]](s32) + ; CHECK: [[OR13:%[0-9]+]]:_(s32) = G_OR [[ZEXT5]], [[SHL14]] + ; CHECK: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[OR11]](s16) + ; CHECK: [[ZEXT8:%[0-9]+]]:_(s32) = G_ZEXT [[OR12]](s16) + ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[ZEXT8]], [[C1]](s32) + ; CHECK: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT7]], [[SHL15]] + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR13]](s32), [[OR14]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64), [[MV1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s88) = G_TRUNC [[MV2]](s704) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s88) + %0:_(s32) = COPY $vgpr0 + %1:_(s88) = G_SEXT %0 + S_ENDPGM 0, implicit %1 +... + +# The instruction count blows up for this and takes too long to +# generate checks. This fails on a G_MERGE_VALUES to s4160 +# +# --- +# name: test_sext_s32_to_s65 +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:_(s32) = COPY $vgpr0 +# %1:_(s65) = G_SEXT %0 +# S_ENDPGM 0, implicit %1 +# ... + + +# This requires fixing a bug in merge/unmerge legalization. +# --- +# name: test_sext_s2_to_s112 +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:_(s32) = COPY $vgpr0 +# %1:_(s2) = G_TRUNC %0 +# %2:_(s112) = G_SEXT %1 +# S_ENDPGM 0, implicit %2 +# ... Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-shl.mir @@ -878,71 +878,74 @@ ; SI: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC]](s32) ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; VI-LABEL: name: test_shl_s128_s128 ; VI: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC]](s32) ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] + ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] ; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) ; GFX9-LABEL: name: test_shl_s128_s128 ; GFX9: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr4 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s128) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s128) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[TRUNC]](s32) ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV]], [[SUB1]](s32) ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[TRUNC]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[SUB]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C2]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV1]](s128) + ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT]](s64), [[SELECT2]](s64) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[MV2]](s128) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s32) = COPY $vgpr4 %2:_(s128) = G_ZEXT %1 @@ -1276,33 +1279,34 @@ ; SI: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; SI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; SI: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; SI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; SI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; SI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; SI: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; SI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; SI: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; SI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; SI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; SI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; SI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; SI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; SI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; SI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; SI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; SI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; SI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; SI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[TRUNC]](s32) ; SI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; SI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) ; SI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; SI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; SI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] + ; SI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] ; SI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; SI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; SI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; SI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; SI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; SI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; SI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; SI: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; SI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; SI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) @@ -1311,77 +1315,78 @@ ; SI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; SI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; SI: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; SI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] + ; SI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] ; SI: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; SI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; SI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; SI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; SI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; SI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; SI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; SI: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; SI: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[TRUNC]](s32) ; SI: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; SI: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[TRUNC]](s32) ; SI: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; SI: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; SI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] + ; SI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] ; SI: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; SI: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; SI: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; SI: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; SI: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; SI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; SI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; SI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; SI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; SI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; SI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; SI: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; SI: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; SI: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; SI: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; SI: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; SI: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; SI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] + ; SI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] ; SI: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; SI: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; SI: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] - ; SI: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; SI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) + ; SI: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] + ; SI: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; SI: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; SI: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; SI: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) ; SI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT14]] ; SI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT15]] - ; SI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; SI: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; SI: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; SI: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; SI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) ; VI-LABEL: name: test_shl_s256_s256 ; VI: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; VI: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; VI: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; VI: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; VI: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; VI: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; VI: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; VI: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; VI: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; VI: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; VI: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; VI: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; VI: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; VI: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; VI: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; VI: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; VI: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; VI: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; VI: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[TRUNC]](s32) ; VI: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; VI: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) ; VI: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; VI: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; VI: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] + ; VI: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] ; VI: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; VI: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; VI: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; VI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; VI: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; VI: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; VI: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; VI: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; VI: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; VI: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) @@ -1390,77 +1395,78 @@ ; VI: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; VI: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; VI: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; VI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] + ; VI: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] ; VI: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; VI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; VI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; VI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; VI: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; VI: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; VI: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; VI: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; VI: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[TRUNC]](s32) ; VI: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; VI: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[TRUNC]](s32) ; VI: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; VI: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; VI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] + ; VI: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] ; VI: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; VI: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; VI: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; VI: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; VI: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; VI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; VI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; VI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; VI: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; VI: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; VI: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; VI: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; VI: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; VI: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; VI: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; VI: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; VI: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; VI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] + ; VI: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] ; VI: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; VI: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; VI: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] - ; VI: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; VI: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) + ; VI: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] + ; VI: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; VI: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; VI: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; VI: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) ; VI: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT14]] ; VI: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT15]] - ; VI: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; VI: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; VI: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; VI: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; VI: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) ; GFX9-LABEL: name: test_shl_s256_s256 ; GFX9: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr8 ; GFX9: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX9: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s256) - ; GFX9: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 + ; GFX9: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[C]](s32) + ; GFX9: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; GFX9: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; GFX9: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV1]](s256) + ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 128 ; GFX9: [[UV:%[0-9]+]]:_(s128), [[UV1:%[0-9]+]]:_(s128) = G_UNMERGE_VALUES [[COPY]](s256) - ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C1]] - ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C1]], [[TRUNC]] - ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C1]] + ; GFX9: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] + ; GFX9: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] + ; GFX9: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] ; GFX9: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] - ; GFX9: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 + ; GFX9: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; GFX9: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; GFX9: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; GFX9: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; GFX9: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[TRUNC]](s32) ; GFX9: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[UV2]], [[SUB3]](s32) ; GFX9: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[UV3]], [[TRUNC]](s32) ; GFX9: [[OR:%[0-9]+]]:_(s64) = G_OR [[LSHR]], [[SHL1]] - ; GFX9: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; GFX9: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV2]], [[SUB2]](s32) - ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C3]] + ; GFX9: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[SHL]], [[C1]] ; GFX9: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP2]](s1), [[OR]], [[SHL2]] ; GFX9: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP3]](s1), [[UV3]], [[SELECT1]] ; GFX9: [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C2]] - ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB1]] - ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C2]] + ; GFX9: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[SUB1]], [[C3]] + ; GFX9: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB1]] + ; GFX9: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB1]](s32), [[C3]] ; GFX9: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB1]](s32), [[C]] ; GFX9: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB1]](s32) ; GFX9: [[LSHR2:%[0-9]+]]:_(s64) = G_LSHR [[UV4]], [[SUB1]](s32) @@ -1469,46 +1475,46 @@ ; GFX9: [[LSHR3:%[0-9]+]]:_(s64) = G_LSHR [[UV5]], [[SUB4]](s32) ; GFX9: [[SELECT3:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[OR1]], [[LSHR3]] ; GFX9: [[SELECT4:%[0-9]+]]:_(s64) = G_SELECT [[ICMP5]](s1), [[UV4]], [[SELECT3]] - ; GFX9: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C3]] + ; GFX9: [[SELECT5:%[0-9]+]]:_(s64) = G_SELECT [[ICMP4]](s1), [[LSHR1]], [[C1]] ; GFX9: [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) - ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C2]] - ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[TRUNC]] - ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C2]] + ; GFX9: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] + ; GFX9: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] + ; GFX9: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC]](s32), [[C3]] ; GFX9: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[TRUNC]](s32), [[C]] ; GFX9: [[SHL4:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[TRUNC]](s32) ; GFX9: [[LSHR4:%[0-9]+]]:_(s64) = G_LSHR [[UV6]], [[SUB7]](s32) ; GFX9: [[SHL5:%[0-9]+]]:_(s64) = G_SHL [[UV7]], [[TRUNC]](s32) ; GFX9: [[OR2:%[0-9]+]]:_(s64) = G_OR [[LSHR4]], [[SHL5]] ; GFX9: [[SHL6:%[0-9]+]]:_(s64) = G_SHL [[UV6]], [[SUB6]](s32) - ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C3]] + ; GFX9: [[SELECT6:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[SHL4]], [[C1]] ; GFX9: [[SELECT7:%[0-9]+]]:_(s64) = G_SELECT [[ICMP6]](s1), [[OR2]], [[SHL6]] ; GFX9: [[SELECT8:%[0-9]+]]:_(s64) = G_SELECT [[ICMP7]](s1), [[UV7]], [[SELECT7]] ; GFX9: [[OR3:%[0-9]+]]:_(s64) = G_OR [[SELECT4]], [[SELECT6]] ; GFX9: [[OR4:%[0-9]+]]:_(s64) = G_OR [[SELECT5]], [[SELECT8]] ; GFX9: [[UV8:%[0-9]+]]:_(s64), [[UV9:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV]](s128) - ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C2]] - ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[SUB]] - ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C2]] + ; GFX9: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[SUB]], [[C3]] + ; GFX9: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[SUB]] + ; GFX9: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[SUB]](s32), [[C3]] ; GFX9: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[SUB]](s32), [[C]] ; GFX9: [[SHL7:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB]](s32) ; GFX9: [[LSHR5:%[0-9]+]]:_(s64) = G_LSHR [[UV8]], [[SUB9]](s32) ; GFX9: [[SHL8:%[0-9]+]]:_(s64) = G_SHL [[UV9]], [[SUB]](s32) ; GFX9: [[OR5:%[0-9]+]]:_(s64) = G_OR [[LSHR5]], [[SHL8]] ; GFX9: [[SHL9:%[0-9]+]]:_(s64) = G_SHL [[UV8]], [[SUB8]](s32) - ; GFX9: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C3]] + ; GFX9: [[SELECT9:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[SHL7]], [[C1]] ; GFX9: [[SELECT10:%[0-9]+]]:_(s64) = G_SELECT [[ICMP8]](s1), [[OR5]], [[SHL9]] ; GFX9: [[SELECT11:%[0-9]+]]:_(s64) = G_SELECT [[ICMP9]](s1), [[UV9]], [[SELECT10]] - ; GFX9: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C3]] - ; GFX9: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C3]] - ; GFX9: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) + ; GFX9: [[SELECT12:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT]], [[C1]] + ; GFX9: [[SELECT13:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SELECT2]], [[C1]] + ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT12]](s64), [[SELECT13]](s64) ; GFX9: [[SELECT14:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR3]], [[SELECT9]] ; GFX9: [[SELECT15:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR4]], [[SELECT11]] ; GFX9: [[UV10:%[0-9]+]]:_(s64), [[UV11:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[UV1]](s128) ; GFX9: [[SELECT16:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV10]], [[SELECT14]] ; GFX9: [[SELECT17:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV11]], [[SELECT15]] - ; GFX9: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) - ; GFX9: [[MV3:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV1]](s128), [[MV2]](s128) - ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV3]](s256) + ; GFX9: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[SELECT16]](s64), [[SELECT17]](s64) + ; GFX9: [[MV4:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV2]](s128), [[MV3]](s128) + ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[MV4]](s256) %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 %1:_(s32) = COPY $vgpr8 %2:_(s256) = G_ZEXT %1 Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir @@ -332,15 +332,15 @@ ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32) - ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[MV]], [[C]] - ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[MV1]], [[C1]] - ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[DEF1]], [[C1]] + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[AND]](s64), [[AND1]](s64) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 15 - ; CHECK: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C2]](s64), [[C1]](s64) - ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s128) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C2]](s64), [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV2]](s128) ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 64 - ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV2]](s128) + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128) ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[TRUNC]], [[C3]] ; CHECK: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC]] ; CHECK: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 @@ -354,12 +354,12 @@ ; CHECK: [[SELECT:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[SHL]], [[C1]] ; CHECK: [[SELECT1:%[0-9]+]]:_(s64) = G_SELECT [[ICMP]](s1), [[OR]], [[SHL2]] ; CHECK: [[SELECT2:%[0-9]+]]:_(s64) = G_SELECT [[ICMP1]](s1), [[UV1]], [[SELECT1]] - ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV2]](s128) + ; CHECK: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[MV1]](s128) ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[UV2]], [[SELECT]] ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[UV3]], [[SELECT2]] ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 30 - ; CHECK: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C5]](s64), [[C1]](s64) - ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV4]](s128) + ; CHECK: [[MV3:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C5]](s64), [[C1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s128) ; CHECK: [[SUB2:%[0-9]+]]:_(s32) = G_SUB [[TRUNC1]], [[C3]] ; CHECK: [[SUB3:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC1]] ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC1]](s32), [[C3]] @@ -375,8 +375,8 @@ ; CHECK: [[OR4:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[SELECT3]] ; CHECK: [[OR5:%[0-9]+]]:_(s64) = G_OR [[OR2]], [[SELECT5]] ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 45 - ; CHECK: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C6]](s64), [[C1]](s64) - ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[MV5]](s128) + ; CHECK: [[MV4:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C6]](s64), [[C1]](s64) + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[MV4]](s128) ; CHECK: [[SUB4:%[0-9]+]]:_(s32) = G_SUB [[TRUNC2]], [[C3]] ; CHECK: [[SUB5:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC2]] ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC2]](s32), [[C3]] @@ -392,8 +392,8 @@ ; CHECK: [[OR7:%[0-9]+]]:_(s64) = G_OR [[OR4]], [[SELECT6]] ; CHECK: [[OR8:%[0-9]+]]:_(s64) = G_OR [[OR5]], [[SELECT8]] ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 - ; CHECK: [[MV6:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C7]](s64), [[C1]](s64) - ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[MV6]](s128) + ; CHECK: [[MV5:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C7]](s64), [[C1]](s64) + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s32) = G_TRUNC [[MV5]](s128) ; CHECK: [[SUB6:%[0-9]+]]:_(s32) = G_SUB [[TRUNC3]], [[C3]] ; CHECK: [[SUB7:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC3]] ; CHECK: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC3]](s32), [[C3]] @@ -409,8 +409,8 @@ ; CHECK: [[OR10:%[0-9]+]]:_(s64) = G_OR [[OR7]], [[SELECT9]] ; CHECK: [[OR11:%[0-9]+]]:_(s64) = G_OR [[OR8]], [[SELECT11]] ; CHECK: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 75 - ; CHECK: [[MV7:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C8]](s64), [[C1]](s64) - ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[MV7]](s128) + ; CHECK: [[MV6:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C8]](s64), [[C1]](s64) + ; CHECK: [[TRUNC4:%[0-9]+]]:_(s32) = G_TRUNC [[MV6]](s128) ; CHECK: [[SUB8:%[0-9]+]]:_(s32) = G_SUB [[TRUNC4]], [[C3]] ; CHECK: [[SUB9:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC4]] ; CHECK: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC4]](s32), [[C3]] @@ -426,8 +426,8 @@ ; CHECK: [[OR13:%[0-9]+]]:_(s64) = G_OR [[OR10]], [[SELECT12]] ; CHECK: [[OR14:%[0-9]+]]:_(s64) = G_OR [[OR11]], [[SELECT14]] ; CHECK: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 90 - ; CHECK: [[MV8:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C9]](s64), [[C1]](s64) - ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[MV8]](s128) + ; CHECK: [[MV7:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C9]](s64), [[C1]](s64) + ; CHECK: [[TRUNC5:%[0-9]+]]:_(s32) = G_TRUNC [[MV7]](s128) ; CHECK: [[SUB10:%[0-9]+]]:_(s32) = G_SUB [[TRUNC5]], [[C3]] ; CHECK: [[SUB11:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC5]] ; CHECK: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC5]](s32), [[C3]] @@ -443,8 +443,8 @@ ; CHECK: [[OR16:%[0-9]+]]:_(s64) = G_OR [[OR13]], [[SELECT15]] ; CHECK: [[OR17:%[0-9]+]]:_(s64) = G_OR [[OR14]], [[SELECT17]] ; CHECK: [[C10:%[0-9]+]]:_(s64) = G_CONSTANT i64 105 - ; CHECK: [[MV9:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C10]](s64), [[C1]](s64) - ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[MV9]](s128) + ; CHECK: [[MV8:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C10]](s64), [[C1]](s64) + ; CHECK: [[TRUNC6:%[0-9]+]]:_(s32) = G_TRUNC [[MV8]](s128) ; CHECK: [[SUB12:%[0-9]+]]:_(s32) = G_SUB [[TRUNC6]], [[C3]] ; CHECK: [[SUB13:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[TRUNC6]] ; CHECK: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[TRUNC6]](s32), [[C3]] Index: llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir +++ llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-zext.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 -o - %s | FileCheck %s --- name: test_zext_s32_to_s64 @@ -323,13 +323,72 @@ ; CHECK-LABEL: name: test_zext_s32_to_s128 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s128) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s128) %0:_(s32) = COPY $vgpr0 %1:_(s128) = G_ZEXT %0 S_ENDPGM 0, implicit %1 ... +--- +name: test_zext_s32_to_s160 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s32_to_s160 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s320) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s160) = G_TRUNC [[MV1]](s320) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s160) + %0:_(s32) = COPY $vgpr0 + %1:_(s160) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... + + +--- +name: test_zext_s32_to_s192 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s32_to_s192 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s192) + %0:_(s32) = COPY $vgpr0 + %1:_(s192) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... + +--- +name: test_zext_s32_to_s224 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s32_to_s224 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224) + %0:_(s32) = COPY $vgpr0 + %1:_(s224) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_zext_s32_to_s256 body: | @@ -339,8 +398,10 @@ ; CHECK-LABEL: name: test_zext_s32_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256) %0:_(s32) = COPY $vgpr0 %1:_(s256) = G_ZEXT %0 S_ENDPGM 0, implicit %1 @@ -355,13 +416,34 @@ ; CHECK-LABEL: name: test_zext_s32_to_s512 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s512) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s512) %0:_(s32) = COPY $vgpr0 %1:_(s512) = G_ZEXT %0 S_ENDPGM 0, implicit %1 ... +--- +name: test_zext_s32_to_s992 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s32_to_s992 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s224) = G_TRUNC [[MV1]](s448) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s224) + %0:_(s32) = COPY $vgpr0 + %1:_(s224) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_zext_s32_to_s1024 body: | @@ -371,8 +453,10 @@ ; CHECK-LABEL: name: test_zext_s32_to_s1024 ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32) - ; CHECK: S_ENDPGM 0, implicit [[MV]](s1024) + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[C]](s32) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[MV]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64), [[C1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV1]](s1024) %0:_(s32) = COPY $vgpr0 %1:_(s1024) = G_ZEXT %0 S_ENDPGM 0, implicit %1 @@ -394,6 +478,22 @@ S_ENDPGM 0, implicit %1 ... +--- +name: test_zext_s64_to_s192 +body: | + bb.0: + liveins: $vgpr0_vgpr1 + + ; CHECK-LABEL: name: test_zext_s64_to_s192 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[C]](s64), [[C]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s192) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s192) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... + --- name: test_zext_s64_to_s256 body: | @@ -442,16 +542,24 @@ S_ENDPGM 0, implicit %1 ... -# --- -# name: test_zext_s96_to_s128 -# body: | -# bb.0: -# liveins: $vgpr0_vgpr1_vgpr2 +--- +name: test_zext_s96_to_s128 +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2 -# %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 -# %1:_(s128) = G_ZEXT %0 -# S_ENDPGM 0, implicit %1 -# ... + ; CHECK-LABEL: name: test_zext_s96_to_s128 + ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32) + ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[C]](s32) + ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV2]](s128) + %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2 + %1:_(s128) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... --- name: test_zext_s128_to_s256 @@ -461,11 +569,133 @@ ; CHECK-LABEL: name: test_zext_s128_to_s256 ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 + ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s128) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 - ; CHECK: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[C]](s64), [[C]](s64) - ; CHECK: [[MV1:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[COPY]](s128), [[MV]](s128) - ; CHECK: S_ENDPGM 0, implicit [[MV1]](s256) + ; CHECK: [[MV:%[0-9]+]]:_(s256) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[C]](s64), [[C]](s64) + ; CHECK: S_ENDPGM 0, implicit [[MV]](s256) %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(s256) = G_ZEXT %0 S_ENDPGM 0, implicit %1 ... + +--- +name: test_zext_s32_to_s88 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s32_to_s88 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 + ; CHECK: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[ZEXT]], [[C]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s64) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[OR]], [[C1]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s64) = G_OR [[OR]], [[SHL1]] + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 24 + ; CHECK: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[OR1]], [[C2]](s32) + ; CHECK: [[OR2:%[0-9]+]]:_(s64) = G_OR [[OR1]], [[SHL2]] + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[OR2]](s64) + ; CHECK: [[C3:%[0-9]+]]:_(s16) = G_CONSTANT i16 255 + ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[UV]](s16) + ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[COPY1]], [[C3]] + ; CHECK: [[COPY2:%[0-9]+]]:_(s16) = COPY [[UV1]](s16) + ; CHECK: [[AND1:%[0-9]+]]:_(s16) = G_AND [[COPY2]], [[C3]] + ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 8 + ; CHECK: [[SHL3:%[0-9]+]]:_(s16) = G_SHL [[AND1]], [[C4]](s16) + ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[AND]], [[SHL3]] + ; CHECK: [[COPY3:%[0-9]+]]:_(s16) = COPY [[UV2]](s16) + ; CHECK: [[AND2:%[0-9]+]]:_(s16) = G_AND [[COPY3]], [[C3]] + ; CHECK: [[COPY4:%[0-9]+]]:_(s16) = COPY [[UV3]](s16) + ; CHECK: [[AND3:%[0-9]+]]:_(s16) = G_AND [[COPY4]], [[C3]] + ; CHECK: [[SHL4:%[0-9]+]]:_(s16) = G_SHL [[AND3]], [[C4]](s16) + ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[AND2]], [[SHL4]] + ; CHECK: [[C5:%[0-9]+]]:_(s16) = G_CONSTANT i16 0 + ; CHECK: [[AND4:%[0-9]+]]:_(s16) = G_AND [[C5]], [[C3]] + ; CHECK: [[SHL5:%[0-9]+]]:_(s16) = G_SHL [[AND4]], [[C4]](s16) + ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[AND4]], [[SHL5]] + ; CHECK: [[COPY5:%[0-9]+]]:_(s16) = COPY [[OR5]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR3]](s16) + ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR4]](s16) + ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[ZEXT2]], [[C1]](s32) + ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT1]], [[SHL6]] + ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR5]](s16) + ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[COPY5]](s16) + ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[ZEXT4]], [[C1]](s32) + ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[ZEXT3]], [[SHL7]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[MV1:%[0-9]+]]:_(s704) = G_MERGE_VALUES [[MV]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64), [[C6]](s64) + ; CHECK: [[TRUNC:%[0-9]+]]:_(s88) = G_TRUNC [[MV1]](s704) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s88) + %0:_(s32) = COPY $vgpr0 + %1:_(s88) = G_ZEXT %0 + S_ENDPGM 0, implicit %1 +... + +# The instruction count blows up for this and takes too long to +# generate checks. This fails on a G_MERGE_VALUES to s4160 +# +# --- +# name: test_zext_s32_to_s65 +# body: | +# bb.0: +# liveins: $vgpr0 + +# %0:_(s32) = COPY $vgpr0 +# %1:_(s65) = G_ZEXT %0 +# S_ENDPGM 0, implicit %1 +# ... + +--- +name: test_zext_s2_to_s112 +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: test_zext_s2_to_s112 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK: [[TRUNC:%[0-9]+]]:_(s48) = G_TRUNC [[C1]](s64) + ; CHECK: [[DEF:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF + ; CHECK: [[COPY1:%[0-9]+]]:_(s128) = COPY [[DEF]](s128) + ; CHECK: [[INSERT:%[0-9]+]]:_(s128) = G_INSERT [[COPY1]], [[C]](s64), 0 + ; CHECK: [[COPY2:%[0-9]+]]:_(s128) = COPY [[INSERT]](s128) + ; CHECK: [[INSERT1:%[0-9]+]]:_(s128) = G_INSERT [[COPY2]], [[TRUNC]](s48), 64 + ; CHECK: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](s32) + ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UV]](s16) + ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UV1]](s16) + ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C2]](s32) + ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C3]], [[C2]](s32) + ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[C3]], [[SHL1]] + ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32) + ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF + ; CHECK: [[MV1:%[0-9]+]]:_(s448) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64) + ; CHECK: [[TRUNC1:%[0-9]+]]:_(s128) = G_TRUNC [[MV1]](s448) + ; CHECK: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[TRUNC1]](s128), 0 + ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s48) = G_EXTRACT [[TRUNC1]](s128), 64 + ; CHECK: [[COPY3:%[0-9]+]]:_(s128) = COPY [[INSERT1]](s128) + ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s64) = G_EXTRACT [[COPY3]](s128), 0 + ; CHECK: [[EXTRACT3:%[0-9]+]]:_(s48) = G_EXTRACT [[COPY3]](s128), 64 + ; CHECK: [[AND:%[0-9]+]]:_(s64) = G_AND [[EXTRACT]], [[EXTRACT2]] + ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT1]](s48) + ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[EXTRACT3]](s48) + ; CHECK: [[AND1:%[0-9]+]]:_(s64) = G_AND [[ANYEXT]], [[ANYEXT1]] + ; CHECK: [[TRUNC2:%[0-9]+]]:_(s48) = G_TRUNC [[AND1]](s64) + ; CHECK: [[DEF2:%[0-9]+]]:_(s128) = G_IMPLICIT_DEF + ; CHECK: [[COPY4:%[0-9]+]]:_(s128) = COPY [[DEF2]](s128) + ; CHECK: [[INSERT2:%[0-9]+]]:_(s128) = G_INSERT [[COPY4]], [[AND]](s64), 0 + ; CHECK: [[COPY5:%[0-9]+]]:_(s128) = COPY [[INSERT2]](s128) + ; CHECK: [[INSERT3:%[0-9]+]]:_(s128) = G_INSERT [[COPY5]], [[TRUNC2]](s48), 64 + ; CHECK: [[TRUNC3:%[0-9]+]]:_(s112) = G_TRUNC [[INSERT3]](s128) + ; CHECK: S_ENDPGM 0, implicit [[TRUNC3]](s112) + %0:_(s32) = COPY $vgpr0 + %1:_(s2) = G_TRUNC %0 + %2:_(s112) = G_ZEXT %1 + S_ENDPGM 0, implicit %2 +...